int main(int argc, char **argv) { MPI_Init(&argc, &argv); int rank, size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); int *groups = (int *)malloc(size * sizeof(int)); int *groups2 = (int *)malloc(size * sizeof(int)); int my_type = 1; //This is to be read from a configuration file MPI_Allgather(&my_type, 1, MPI_INT, groups, 1, MPI_INT, MPI_COMM_WORLD); int num_groups = 0; for (int i = 0; i < size; i++) num_groups = num_groups > groups[i] ? num_groups : groups[i]; num_groups++; //The global group MPI_Group global_grp; MPI_Comm_group(MPI_COMM_WORLD, &global_grp); //Create sub-groups and sub-communicators MPI_Group mpigroups[num_groups]; MPI_Comm mpicomms[num_groups]; int count = 0; for (int i = 0; i < num_groups; ++i) { count = 0; for (int j = 0; j < size; ++j) { if (groups[j] == i) { groups2[count++] = j; } } MPI_Group_incl(global_grp, count, groups2, &mpigroups[i]); MPI_Comm_create(MPI_COMM_WORLD, mpigroups[i], &mpicomms[i]); } //coupling procs for (int i = 0; i < 1; ++i) { count = 0; for (int j = 0; j < size; ++j) { if (groups[j] == i) { groups2[count++] = j; } } } // OP initialisation op_mpi_init(argc,argv,2,MPI_COMM_WORLD, mpicomms[1]); int niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // set constants and initialise flow field and residual op_printf("initialising flow field \n"); char file[] = "new_grid.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell"); op_map pbndbnd = op_decl_map_hdf5(bedges, bedges,1, file, "pbndbnd"); op_map m_test = op_decl_map_hdf5(cells, nodes,4, file, "m_test"); if (m_test == NULL) printf("m_test not found\n"); op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x"); op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q"); op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt"); op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res"); op_dat p_test = op_decl_dat_hdf5(cells ,4,"double",file,"p_test"); if (p_test == NULL) printf("p_test not found\n"); op_get_const_hdf5("gam", 1, "double", (char *)&gam, "new_grid.h5"); op_get_const_hdf5("gm1", 1, "double", (char *)&gm1, "new_grid.h5"); op_get_const_hdf5("cfl", 1, "double", (char *)&cfl, "new_grid.h5"); op_get_const_hdf5("eps", 1, "double", (char *)&eps, "new_grid.h5"); op_get_const_hdf5("mach", 1, "double", (char *)&mach, "new_grid.h5"); op_get_const_hdf5("alpha", 1, "double", (char *)&alpha, "new_grid.h5"); op_get_const_hdf5("qinf", 4, "double", (char *)&qinf, "new_grid.h5"); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&cfl ); op_decl_const(1,"double",&eps ); op_decl_const(1,"double",&mach ); op_decl_const(1,"double",&alpha); op_decl_const(4,"double",qinf ); op_diagnostic_output(); //write back original data just to compare you read the file correctly //do an h5diff between new_grid_out.h5 and new_grid.h5 to //compare two hdf5 files op_dump_to_hdf5("new_grid_out.h5"); op_write_const_hdf5("gam",1,"double",(char *)&gam, "new_grid_out.h5"); op_write_const_hdf5("gm1",1,"double",(char *)&gm1, "new_grid_out.h5"); op_write_const_hdf5("cfl",1,"double",(char *)&cfl, "new_grid_out.h5"); op_write_const_hdf5("eps",1,"double",(char *)&eps, "new_grid_out.h5"); op_write_const_hdf5("mach",1,"double",(char *)&mach, "new_grid_out.h5"); op_write_const_hdf5("alpha",1,"double",(char *)&alpha, "new_grid_out.h5"); op_write_const_hdf5("qinf",4,"double",(char *)qinf, "new_grid_out.h5"); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x); //op_partition("PARMETIS", "KWAY", edges, pecell, p_x); int g_ncell = op_get_size(cells); //create some temporaries so we can exchange data defined on the boundary double *ptr = NULL; op_dat center = op_decl_dat_temp(bedges, 3, "double", ptr, "center"); op_dat pres = op_decl_dat_temp(bedges, 1, "double", ptr, "pres"); int *ptr2 = NULL; op_dat p_bound2 = op_decl_dat_temp(bedges, 1, "int", ptr2, "p_bound2"); op_dat center2 = op_decl_dat_temp(bedges, 3, "double", ptr, "center2"); op_dat pres2 = op_decl_dat_temp(bedges, 1, "double", ptr, "pres2"); //create import and export handles op_export_handle handle = op_export_init(count, groups2, pbndbnd); op_import_handle handle2 = op_import_init(count, groups2, center); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE)); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pecell,4,"double",OP_READ), op_arg_dat(p_q, 1,pecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"double",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pecell,4,"double",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"double",OP_INC )); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"double",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ), op_arg_dat(center, -1, OP_ID, 3, "double", OP_WRITE), op_arg_dat(pres, -1, OP_ID, 1, "double", OP_WRITE)); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double)g_ncell); if (iter%100 == 0) { op_printf(" %d %10.5e \n",iter,rms); //Export data op_dat arr[] = {p_bound, center, pres}; op_export_data(handle, 3, arr); //Import data op_dat arr2[] = {p_bound2, center2, pres2}; op_import_data(handle2, 3, arr2); //check whether the two are the same op_par_loop(comparethem, "comparethem", bedges, op_arg_dat(p_bound,-1, OP_ID, 1, "int", OP_READ), op_arg_dat(p_bound2,-1, OP_ID, 1, "int", OP_READ), op_arg_dat(center,-1, OP_ID, 3, "double", OP_READ), op_arg_dat(center2,-1, OP_ID, 3, "double", OP_READ), op_arg_dat(pres,-1, OP_ID, 1, "double", OP_READ), op_arg_dat(pres2,-1, OP_ID, 1, "double", OP_READ)); } } op_timers(&cpu_t2, &wall_t2); double* q = (double *)malloc(sizeof(double)*op_get_size(cells)*4); op_fetch_data_hdf5(p_q, q, 0, op_get_size(cells)-1); free(q); op_fetch_data_hdf5_file(p_q, "file_name.h5"); //printf("Root process = %d\n",op_is_root()); //output the result dat array to files //op_write_hdf5("new_grid_out.h5"); //compress using // ~/hdf5/bin/h5repack -f GZIP=9 new_grid.h5 new_grid_pack.h5 op_timing_output(); op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1); op_exit(); }
int main(int argc, char **argv){ int *becell, *ecell, *bound, *bedge, *edge, *cell; float *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; float rms; // read in grid printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("new_grid.dat","r")) == NULL) { printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (float *) malloc(2*nnode*sizeof(float)); q = (float *) malloc(4*ncell*sizeof(float)); qold = (float *) malloc(4*ncell*sizeof(float)); res = (float *) malloc(4*ncell*sizeof(float)); adt = (float *) malloc( ncell*sizeof(float)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%f %f \n",&x[2*n], &x[2*n+1]) != 2) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); // set constants and initialise flow field and residual printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; float mach = 0.4f; float alpha = 3.0f*atan(1.0f)/45.0f; float p = 1.0f; float r = 1.0f; float u = sqrt(gam*p/r)*mach; float e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = qinf[m]; res[4*n+m] = 0.0f; } } // OP initialisation printf("OP init\n"); op_init(argc,argv,7); // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"float",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"float",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"float",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"float",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"float",res ,"p_res"); op_decl_const(1,"float",&gam ); op_decl_const(1,"float",&gm1 ); op_decl_const(1,"float",&cfl ); op_decl_const(1,"float",&eps ); op_decl_const(1,"float",&mach ); op_decl_const(1,"float",&alpha); op_decl_const(4,"float",qinf ); op_tuner *OP_tuner; op_diagnostic_output(); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_WRITE), NULL); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"float",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"float",OP_WRITE), NULL); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"float",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"float",OP_READ), op_arg_dat(p_q, 0,pecell,4,"float",OP_READ), op_arg_dat(p_q, 1,pecell,4,"float",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"float",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"float",OP_READ), op_arg_dat(p_res, 0,pecell,4,"float",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"float",OP_INC ), NULL); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"float",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"float",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"float",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"float",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"float",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ), NULL); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"float",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"float",OP_READ ), op_arg_gbl(&rms,1,"float",OP_INC), NULL); } // print iteration history rms = sqrt(rms/(float) ncell); if (iter%100 == 0) printf(" %d %10.5e \n",iter,rms); } op_timing_output(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *pp; double *A, *r, *u, *du; int nnode, nedge; /**------------------------BEGIN I/O and PARTITIONING ---------------------**/ int g_nnode, g_nedge, g_n, g_e; g_nnode = (NN-1)*(NN-1); g_nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2); int *g_pp = 0; double *g_A = 0, *g_r = 0, *g_u = 0, *g_du = 0; op_printf("Global number of nodes, edges = %d, %d\n",g_nnode,g_nedge); if(my_rank == MPI_ROOT) { g_pp = (int *)malloc(sizeof(int)*2*g_nedge); g_A = (double *)malloc(sizeof(double)*g_nedge); g_r = (double *)malloc(sizeof(double)*g_nnode); g_u = (double *)malloc(sizeof(double)*g_nnode); g_du = (double *)malloc(sizeof(double)*g_nnode); // create matrix and r.h.s., and set coordinates needed for renumbering / partitioning g_e = 0; for (int i=1; i<NN; i++) { for (int j=1; j<NN; j++) { g_n = i-1 + (j-1)*(NN-1); g_r[g_n] = 0.0f; g_u[g_n] = 0.0f; g_du[g_n] = 0.0f; g_pp[2*g_e] = g_n; g_pp[2*g_e+1] = g_n; g_A[g_e] = -1.0f; g_e++; for (int pass=0; pass<4; pass++) { int i2 = i; int j2 = j; if (pass==0) i2 += -1; if (pass==1) i2 += 1; if (pass==2) j2 += -1; if (pass==3) j2 += 1; if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) { g_r[g_n] += 0.25f; } else { g_pp[2*g_e] = g_n; g_pp[2*g_e+1] = i2-1 + (j2-1)*(NN-1); g_A[g_e] = 0.25f; g_e++; } } } } } /* Compute local sizes */ nnode = compute_local_size (g_nnode, comm_size, my_rank); nedge = compute_local_size (g_nedge, comm_size, my_rank); op_printf("Number of nodes, edges on process %d = %d, %d\n" ,my_rank,nnode,nedge); /*Allocate memory to hold local sets, mapping tables and data*/ pp = (int *)malloc(2*sizeof(int)*nedge); A = (double *) malloc(nedge*sizeof(double)); r = (double *) malloc(nnode*sizeof(double)); u = (double *) malloc(nnode*sizeof(double)); du = (double *) malloc(nnode*sizeof(double)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_pp, pp, comm_size, g_nedge,nedge, 2); scatter_double_array(g_A, A, comm_size, g_nedge,nedge, 1); scatter_double_array(g_r, r, comm_size, g_nnode,nnode, 1); scatter_double_array(g_u, u, comm_size, g_nnode,nnode, 1); scatter_double_array(g_du, du, comm_size, g_nnode,nnode, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if(my_rank == MPI_ROOT) { free(g_pp); free(g_A); free(g_r); free(g_u); free(g_du); } /**------------------------END I/O and PARTITIONING ---------------------**/ // declare sets, pointers, and datasets op_set nodes = op_decl_set(nnode,"nodes"); op_set edges = op_decl_set(nedge,"edges"); op_map ppedge = op_decl_map(edges,nodes,2,pp, "ppedge"); op_dat p_A = op_decl_dat(edges,1,"double", A, "p_A" ); op_dat p_r = op_decl_dat(nodes,1,"double", r, "p_r" ); op_dat p_u = op_decl_dat(nodes,1,"double", u, "p_u" ); op_dat p_du = op_decl_dat(nodes,1,"double", du,"p_du"); alpha = 1.0f; op_decl_const(1,"double",&alpha); op_diagnostic_output(); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", NULL, NULL, NULL); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main iteration loop double u_sum, u_max, beta = 1.0f; for (int iter=0; iter<NITER; iter++) { op_par_loop(res,"res", edges, op_arg_dat(p_A, -1,OP_ID, 1,"double", OP_READ), op_arg_dat(p_u, 1,ppedge, 1,"double", OP_READ), op_arg_dat(p_du, 0,ppedge, 1,"double", OP_INC), op_arg_gbl(&beta, 1,"double", OP_READ)); u_sum = 0.0f; u_max = 0.0f; op_par_loop(update,"update", nodes, op_arg_dat(p_r, -1,OP_ID, 1,"double",OP_READ), op_arg_dat(p_du, -1,OP_ID, 1,"double",OP_RW), op_arg_dat(p_u, -1,OP_ID, 1,"double",OP_INC), op_arg_gbl(&u_sum,1,"double",OP_INC), op_arg_gbl(&u_max,1,"double",OP_MAX)); op_printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/g_nnode)); } op_timers(&cpu_t2, &wall_t2); //get results data array op_dat temp = op_mpi_get_data(p_u); //output the result dat array to files print_dat_tofile(temp, "out_grid.dat"); //ASCI //print_dat_tobinfile(temp, "out_grid.bin"); //Binary //print each mpi process's timing info for each kernel op_timing_output(); //print total time for niter interations op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); }
int main(int argc, char **argv){ int nnode, nedge, n, e; float dx; op_set nodes, edges; op_map ppedge; op_dat p_A, p_r, p_u, p_du, p_beta, p_u_sum, p_u_max; nnode = (NN-1)*(NN-1); nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2); dx = 1.0f / ((float) NN); int *pp = (int *)malloc(sizeof(int)*2*nedge); float *A = (float *)malloc(sizeof(float)*nedge); float *r = (float *)malloc(sizeof(float)*nnode); float *u = (float *)malloc(sizeof(float)*nnode); float *du = (float *)malloc(sizeof(float)*nnode); /* create matrix and r.h.s., and set coordinates needed for renumbering / partitioning */ e = 0; for (int i=1; i<NN; i++) { for (int j=1; j<NN; j++) { n = i-1 + (j-1)*(NN-1); r[n] = 0.0f; u[n] = 0.0f; du[n] = 0.0f; pp[2*e] = n; pp[2*e+1] = n; A[e] = -1.0f; e++; for (int pass=0; pass<4; pass++) { int i2 = i; int j2 = j; if (pass==0) i2 += -1; if (pass==1) i2 += 1; if (pass==2) j2 += -1; if (pass==3) j2 += 1; if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) { r[n] += 0.25f; } else { pp[2*e] = n; pp[2*e+1] = i2-1 + (j2-1)*(NN-1); A[e] = 0.25f; e++; } } } } float u_sum, u_max, beta = 1.0f; /* OP initialisation */ op_init(argc,argv,5); /* declare sets, pointers, and datasets */ op_decl_set(&nodes,nnode, "nodes"); op_decl_set(&edges,nedge, "edges"); op_decl_map(&ppedge,&edges,&nodes,2,pp, "ppedge"); op_decl_vec(&p_A, &edges,1,sizeof(float), A, "p_A" ); op_decl_vec(&p_r, &nodes,1,sizeof(float), r, "p_r" ); op_decl_vec(&p_u, &nodes,1,sizeof(float), u, "p_u" ); op_decl_vec(&p_du,&nodes,1,sizeof(float), du, "p_du"); op_decl_gbl(&p_beta, 1,sizeof(float), &beta, "p_beta"); op_decl_gbl(&p_u_sum, 1,sizeof(float), &u_sum, "p_u_sum"); op_decl_gbl(&p_u_max, 1,sizeof(float), &u_max, "p_u_max"); alpha = 1.0f; op_decl_const(&alpha,1,sizeof(float)); op_diagnostic_output(); /* main iteration loop */ for (int iter=0; iter<NITER; iter++) { op_par_loop_4((void(*)(void*,void*,void*,void*))res,"res", &edges, op_construct_vec_arg(&p_A, OP_NONE, NULL, OP_READ), op_construct_vec_arg(&p_u, 1,&ppedge, OP_READ), op_construct_vec_arg(&p_du, 0,&ppedge, OP_INC), op_construct_gbl_arg(&p_beta,OP_READ)); u_sum = 0.0f; u_max = 0.0f; op_par_loop_5((void(*)(void*,void*,void*,void*,void*))update,"update", &nodes, op_construct_vec_arg(&p_r, OP_NONE, NULL, OP_READ), op_construct_vec_arg(&p_du, OP_NONE, NULL, OP_RW), op_construct_vec_arg(&p_u, OP_NONE, NULL, OP_INC), op_construct_gbl_arg(&p_u_sum,OP_INC), op_construct_gbl_arg(&p_u_max,OP_MAX)); printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/nnode)); } /* print out results */ printf("\n Results after %d iterations:\n\n",NITER); op_fetch_data(p_u); /* op_fetch_data(p_du); op_fetch_data(p_r); */ for (int pass=0; pass<1; pass++) { /* if(pass==0) printf("\narray u\n"); else if(pass==1) printf("\narray du\n"); else if(pass==2) printf("\narray r\n"); */ for (int j=NN-1; j>0; j--) { for (int i=1; i<NN; i++) { if (pass==0) printf(" %7.4f",u[i-1 + (j-1)*(NN-1)]); else if (pass==1) printf(" %7.4f",du[i-1 + (j-1)*(NN-1)]); else if (pass==2) printf(" %7.4f",r[i-1 + (j-1)*(NN-1)]); } printf("\n"); } printf("\n"); } op_timing_output(); op_exit(); }
// // main program // int main(int argc, char **argv){ int my_rank; int comm_size; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; double time; double max_time; int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int niter; double rms; op_timers(&cpu_t1, &wall_t1); // set constants if(my_rank == MPI_ROOT )printf("initialising flow field\n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; // OP initialisation op_init(argc,argv,2); /**------------------------BEGIN Parallel I/O -------------------**/ char file[] = "new_grid.h5";//"new_grid-26mil.h5";//"new_grid.h5"; // declare sets, pointers, datasets and global constants - reading in from file op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell"); op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x"); op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q"); op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt"); op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res"); /**------------------------END Parallel I/O -----------------------**/ op_timers(&cpu_t2, &wall_t2); time = wall_t2-wall_t1; MPI_Reduce(&time,&max_time,1,MPI_DOUBLE, MPI_MAX,MPI_ROOT, MPI_COMM_WORLD); if(my_rank==MPI_ROOT)printf("Max total file read time = %f\n",max_time); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&cfl ); op_decl_const(1,"double",&eps ); op_decl_const(1,"double",&mach ); op_decl_const(1,"double",&alpha); op_decl_const(4,"double",qinf ); op_diagnostic_output(); //write back original data just to compare you read the file correctly //do an h5diff between new_grid_writeback.h5 and new_grid.h5 to //compare two hdf5 files op_write_hdf5("new_grid_out.h5"); //partition with ParMetis //op_partition_geom(p_x); //op_partition_random(cells); //op_partition_kway(pecell); //op_partition_geomkway(p_x, pcell); //partition with PT-Scotch op_partition_ptscotch(pecell); //create halos op_halo_create(); int g_ncell = 0; int* sizes = (int *)malloc(sizeof(int)*comm_size); MPI_Allgather(&cells->size, 1, MPI_INT, sizes, 1, MPI_INT, MPI_COMM_WORLD); for(int i = 0; i<comm_size; i++)g_ncell = g_ncell + sizes[i]; free(sizes); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); niter = 1000; for(int iter=1; iter<=niter; iter++) { //save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE)); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pecell,4,"double",OP_READ), op_arg_dat(p_q, 1,pecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"double",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pecell,4,"double",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"double",OP_INC )); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"double",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ), op_arg_gbl(&rms,1,"double",OP_INC)); } //print iteration history if(my_rank==MPI_ROOT) { rms = sqrt(rms/(double) g_ncell); if (iter%100 == 0) printf("%d %10.5e \n",iter,rms); } } op_timers(&cpu_t2, &wall_t2); //get results data array op_dat temp = op_mpi_get_data(p_q); //output the result dat array to files //op_write_hdf5("new_grid_out.h5"); //compress using // ~/hdf5/bin/h5repack -f GZIP=9 new_grid.h5 new_grid_pack.h5 //free memory allocated to halos op_halo_destroy(); //return all op_dats, op_maps back to original element order op_partition_reverse(); //print each mpi process's timing info for each kernel op_mpi_timing_output(); //print total time for niter interations time = wall_t2-wall_t1; MPI_Reduce(&time,&max_time,1,MPI_DOUBLE, MPI_MAX,MPI_ROOT, MPI_COMM_WORLD); if(my_rank==MPI_ROOT)printf("Max total runtime = %f\n",max_time); op_exit(); MPI_Finalize(); //user mpi finalize }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *becell, *ecell, *bound, *bedge, *edge, *cell; float *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; float rms; /**------------------------BEGIN I/O and PARTITIONING -------------------**/ op_timers(&cpu_t1, &wall_t1); /* read in grid from disk on root processor */ FILE *fp; if ( (fp = fopen("new_grid.dat","r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } int g_nnode,g_ncell,g_nedge,g_nbedge; check_scan(fscanf(fp,"%d %d %d %d \n",&g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4); int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0; float *g_x = 0,*g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0; // set constants op_printf("initialising flow field\n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; float mach = 0.4f; float alpha = 3.0f*atan(1.0f)/45.0f; float p = 1.0f; float r = 1.0f; float u = sqrt(gam*p/r)*mach; float e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; op_printf("reading in grid \n"); op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n" ,g_nnode,g_ncell,g_nedge,g_nbedge); if(my_rank == MPI_ROOT) { g_cell = (int *) malloc(4*g_ncell*sizeof(int)); g_edge = (int *) malloc(2*g_nedge*sizeof(int)); g_ecell = (int *) malloc(2*g_nedge*sizeof(int)); g_bedge = (int *) malloc(2*g_nbedge*sizeof(int)); g_becell = (int *) malloc( g_nbedge*sizeof(int)); g_bound = (int *) malloc( g_nbedge*sizeof(int)); g_x = (float *) malloc(2*g_nnode*sizeof(float)); g_q = (float *) malloc(4*g_ncell*sizeof(float)); g_qold = (float *) malloc(4*g_ncell*sizeof(float)); g_res = (float *) malloc(4*g_ncell*sizeof(float)); g_adt = (float *) malloc( g_ncell*sizeof(float)); for (int n=0; n<g_nnode; n++){ check_scan(fscanf(fp,"%f %f \n",&g_x[2*n], &g_x[2*n+1]), 2); } for (int n=0; n<g_ncell; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_cell[4*n ], &g_cell[4*n+1], &g_cell[4*n+2], &g_cell[4*n+3]), 4); } for (int n=0; n<g_nedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_edge[2*n],&g_edge[2*n+1], &g_ecell[2*n],&g_ecell[2*n+1]), 4); } for (int n=0; n<g_nbedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_bedge[2*n],&g_bedge[2*n+1], &g_becell[n],&g_bound[n]), 4); } //initialise flow field and residual for (int n=0; n<g_ncell; n++) { for (int m=0; m<4; m++) { g_q[4*n+m] = qinf[m]; g_res[4*n+m] = 0.0f; } } } fclose(fp); nnode = compute_local_size (g_nnode, comm_size, my_rank); ncell = compute_local_size (g_ncell, comm_size, my_rank); nedge = compute_local_size (g_nedge, comm_size, my_rank); nbedge = compute_local_size (g_nbedge, comm_size, my_rank); op_printf("Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n" ,my_rank,nnode,ncell,nedge,nbedge); /*Allocate memory to hold local sets, mapping tables and data*/ cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (float *) malloc(2*nnode*sizeof(float)); q = (float *) malloc(4*ncell*sizeof(float)); qold = (float *) malloc(4*ncell*sizeof(float)); res = (float *) malloc(4*ncell*sizeof(float)); adt = (float *) malloc( ncell*sizeof(float)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4); scatter_int_array(g_edge, edge, comm_size, g_nedge,nedge, 2); scatter_int_array(g_ecell, ecell, comm_size, g_nedge,nedge, 2); scatter_int_array(g_bedge, bedge, comm_size, g_nbedge,nbedge, 2); scatter_int_array(g_becell, becell, comm_size, g_nbedge,nbedge, 1); scatter_int_array(g_bound, bound, comm_size, g_nbedge,nbedge, 1); scatter_float_array(g_x, x, comm_size, g_nnode,nnode, 2); scatter_float_array(g_q, q, comm_size, g_ncell,ncell, 4); scatter_float_array(g_qold, qold, comm_size, g_ncell,ncell, 4); scatter_float_array(g_res, res, comm_size, g_ncell,ncell, 4); scatter_float_array(g_adt, adt, comm_size, g_ncell,ncell, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if(my_rank == MPI_ROOT) { free(g_cell); free(g_edge); free(g_ecell); free(g_bedge); free(g_becell); free(g_bound); free(g_x ); free(g_q); free(g_qold); free(g_adt); free(g_res); } op_timers(&cpu_t2, &wall_t2); op_printf("Max total file read time = %f\n", wall_t2-wall_t1); /**------------------------END I/O and PARTITIONING -----------------------**/ // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"float",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"float",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"float",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"float",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"float",res ,"p_res"); op_decl_const(1,"float",&gam ); op_decl_const(1,"float",&gm1 ); op_decl_const(1,"float",&cfl ); op_decl_const(1,"float",&eps ); op_decl_const(1,"float",&mach ); op_decl_const(1,"float",&alpha); op_decl_const(4,"float",qinf ); op_diagnostic_output(); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", NULL, pecell, p_x); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); niter = 1000; for(int iter=1; iter<=niter; iter++) { //save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"float",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"float",OP_WRITE)); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"float",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"float",OP_READ), op_arg_dat(p_q, 0,pecell,4,"float",OP_READ), op_arg_dat(p_q, 1,pecell,4,"float",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"float",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"float",OP_READ), op_arg_dat(p_res, 0,pecell,4,"float",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"float",OP_INC )); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"float",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"float",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"float",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"float",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"float",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"float",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"float",OP_READ ), op_arg_gbl(&rms,1,"float",OP_INC)); } //print iteration history rms = sqrt(rms/(float) g_ncell); if (iter%100 == 0) op_printf("%d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); //get results data array - perhaps can be later handled by a remporary dat //op_dat temp = op_mpi_get_data(p_q); //output the result dat array to files //print_dat_tofile(temp, "out_grid.dat"); //ASCI //print_dat_tobinfile(temp, "out_grid.bin"); //Binary op_timing_output(); //print total time for niter interations op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc,char *argv[]) { int *becell; int *ecell; int *bound; int *bedge; int *edge; int *cell; float *x; float *q; float *qold; float *adt; float *res; int nnode; int ncell; int nedge; int nbedge; int niter; float rms; if (argc != 2) { printf("Usage: airfoil <grid>\n"); exit(1); } // read in grid printf("reading in grid \n"); char *grid = argv[1]; FILE *fp; if ((fp = fopen(grid,"r")) == 0L) { printf("can\'t open file %s\n",grid); exit((-1)); } if (fscanf(fp,"%d %d %d %d \n",&nnode,&ncell,&nedge,&nbedge) != 4) { printf("error reading from %s\n",grid); exit((-1)); } cell = ((int *)(malloc(((4 * ncell) * (sizeof(int )))))); edge = ((int *)(malloc(((2 * nedge) * (sizeof(int )))))); ecell = ((int *)(malloc(((2 * nedge) * (sizeof(int )))))); bedge = ((int *)(malloc(((2 * nbedge) * (sizeof(int )))))); becell = ((int *)(malloc((nbedge * (sizeof(int )))))); bound = ((int *)(malloc((nbedge * (sizeof(int )))))); x = ((float *)(malloc(((2 * nnode) * (sizeof(float )))))); q = ((float *)(malloc(((4 * ncell) * (sizeof(float )))))); qold = ((float *)(malloc(((4 * ncell) * (sizeof(float )))))); res = ((float *)(malloc(((4 * ncell) * (sizeof(float )))))); adt = ((float *)(malloc((ncell * (sizeof(float )))))); for (int n = 0; n < nnode; n++) { if (fscanf(fp,"%f %f \n",(x + (2 * n)),(x + ((2 * n) + 1))) != 2) { printf("error reading from new_grid.dat\n"); exit((-1)); } } for (int n = 0; n < ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",(cell + (4 * n)),(cell + ((4 * n) + 1)),(cell + ((4 * n) + 2)),(cell + ((4 * n) + 3))) != 4) { printf("error reading from new_grid.dat\n"); exit((-1)); } } for (int n = 0; n < nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",(edge + (2 * n)),(edge + ((2 * n) + 1)),(ecell + (2 * n)),(ecell + ((2 * n) + 1))) != 4) { printf("error reading from new_grid.dat\n"); exit((-1)); } } for (int n = 0; n < nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",(bedge + (2 * n)),(bedge + ((2 * n) + 1)),(becell + n),(bound + n)) != 4) { printf("error reading from new_grid.dat\n"); exit((-1)); } } fclose(fp); // set constants and initialise flow field and residual printf("initialising flow field \n"); gam = 1.4f; gm1 = (gam - 1.0f); cfl = 0.9f; eps = 0.05f; float mach = 0.4f; float alpha = ((3.0f * atan(1.0f)) / 45.0f); float p = 1.0f; float r = 1.0f; float u = (sqrt(((gam * p) / r)) * mach); float e = ((p / (r * gm1)) + ((0.5f * u) * u)); qinf[0] = r; qinf[1] = (r * u); qinf[2] = 0.0f; qinf[3] = (r * e); for (int n = 0; n < ncell; n++) { for (int m = 0; m < 4; m++) { q[(4 * n) + m] = qinf[m]; res[(4 * n) + m] = 0.0f; } } // OP initialisation op_init(argc,argv,2); // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode,"nodes"); op_set edges = op_decl_set(nedge,"edges"); op_set bedges = op_decl_set(nbedge,"bedges"); op_set cells = op_decl_set(ncell,"cells"); op_map pedge = op_decl_map(edges,nodes,2,edge,"pedge"); op_map pecell = op_decl_map(edges,cells,2,ecell,"pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge,"pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells,nodes,4,cell,"pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int",bound,"p_bound"); op_dat p_x = op_decl_dat(nodes,2,"float",x,"p_x"); op_dat p_q = op_decl_dat(cells,4,"float",q,"p_q"); op_dat p_qold = op_decl_dat(cells,4,"float",qold,"p_qold"); op_dat p_adt = op_decl_dat(cells,1,"float",adt,"p_adt"); op_dat p_res = op_decl_dat(cells,4,"float",res,"p_res"); op_decl_const(1,"float",&gam); op_decl_const(1,"float",&gm1); op_decl_const(1,"float",&cfl); op_decl_const(1,"float",&eps); op_decl_const(1,"float",&mach); op_decl_const(1,"float",&alpha); op_decl_const(4,"float",qinf); op_diagnostic_output(); // main time-marching loop niter = 1000; for (int iter = 1; iter <= niter; iter++) { // save old flow solution save_soln_host("save_soln_modified",cells,op_arg_dat(p_q,(-1), OP_ID,4,"float",OP_READ),op_arg_dat(p_qold,(-1), OP_ID,4,"float",OP_WRITE)); // predictor/corrector update loop for (int k = 0; k < 2; k++) { // calculate area/timstep adt_calc_host("adt_calc_modified",cells,op_arg_dat(p_x,0,pcell,2,"float",OP_READ),op_arg_dat(p_x,1,pcell,2,"float",OP_READ),op_arg_dat(p_x,2,pcell,2,"float",OP_READ),op_arg_dat(p_x,3,pcell,2,"float",OP_READ),op_arg_dat(p_q,(-1), OP_ID,4,"float",OP_READ),op_arg_dat(p_adt,(-1), OP_ID,1,"float",OP_WRITE)); // calculate flux residual res_calc_host("res_calc_modified",edges,op_arg_dat(p_x,0,pedge,2,"float",OP_READ),op_arg_dat(p_x,1,pedge,2,"float",OP_READ),op_arg_dat(p_q,0,pecell,4,"float",OP_READ),op_arg_dat(p_q,1,pecell,4,"float",OP_READ),op_arg_dat(p_adt,0,pecell,1,"float",OP_READ),op_arg_dat(p_adt,1,pecell,1,"float",OP_READ),op_arg_dat(p_res,0,pecell,4,"float",OP_INC),op_arg_dat(p_res,1,pecell,4,"float",OP_INC)); bres_calc_host("bres_calc_modified",bedges,op_arg_dat(p_x,0,pbedge,2,"float",OP_READ),op_arg_dat(p_x,1,pbedge,2,"float",OP_READ),op_arg_dat(p_q,0,pbecell,4,"float",OP_READ),op_arg_dat(p_adt,0,pbecell,1,"float",OP_READ),op_arg_dat(p_res,0,pbecell,4,"float",OP_INC),op_arg_dat(p_bound,(-1), OP_ID,1,"int",OP_READ)); // update flow field rms = 0.0; update_host("update_modified",cells,op_arg_dat(p_qold,(-1), OP_ID,4,"float",OP_READ),op_arg_dat(p_q,(-1), OP_ID,4,"float",OP_WRITE),op_arg_dat(p_res,(-1), OP_ID,4,"float",OP_RW),op_arg_dat(p_adt,(-1), OP_ID,1,"float",OP_READ),op_arg_gbl(&rms,1,"float",OP_INC)); } // print iteration history rms = (sqrt((rms / ((float )ncell)))); if ((iter % 100) == 0) printf(" %d %10.5e \n",iter,rms); } /* for (int ll = 0; ll < (4 * ncell); ll++) printf("%lf\n",q[ll]);*/ op_timing_output(); return 0; }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *bnode, *cell, *g_bnode, *g_cell; double *xm, *g_xm;; int nnode,ncell,nbnodes,niter, g_nnode, g_ncell, g_nbnodes; double rms = 1; // read in grid op_printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("FE_grid.dat","r")) == NULL) { op_printf("can't open file FE_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d \n",&g_nnode, &g_ncell, &g_nbnodes) != 3) { op_printf("error reading from new_grid.dat\n"); exit(-1); } if (my_rank == MPI_ROOT) { g_cell = (int *) malloc(4*g_ncell*sizeof(int)); g_bnode = (int *) malloc(g_nbnodes*sizeof(int)); g_xm = (double *) malloc(2*g_nnode*sizeof(double)); for (int n=0; n<g_nnode; n++) { if (fscanf(fp,"%lf %lf \n",&g_xm[2*n], &g_xm[2*n+1]) != 2) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<g_ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&g_cell[4*n ], &g_cell[4*n+1], &g_cell[4*n+2], &g_cell[4*n+3]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<g_nbnodes; n++) { if (fscanf(fp,"%d \n",&g_bnode[n]) != 1) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } } fclose(fp); nnode = compute_local_size (g_nnode, comm_size, my_rank); ncell = compute_local_size (g_ncell, comm_size, my_rank); nbnodes = compute_local_size (g_nbnodes, comm_size, my_rank); cell = (int *) malloc(4*ncell*sizeof(int)); bnode = (int *) malloc(nbnodes*sizeof(int)); xm = (double *) malloc(2*nnode*sizeof(double)); scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4); scatter_int_array(g_bnode, bnode, comm_size, g_nbnodes,nbnodes, 1); scatter_double_array(g_xm, xm, comm_size, g_nnode,nnode, 2); if(my_rank == MPI_ROOT) { free(g_cell); free(g_xm); free(g_bnode); } // set constants and initialise flow field and residual op_printf("initialising flow field \n"); double gam = 1.4; gm1 = gam - 1.0; gm1i = 1.0/gm1; wtg1[0] = 0.5; wtg1[1] = 0.5; xi1[0] = 0.211324865405187; xi1[1] = 0.788675134594813; Ng1[0] = 0.788675134594813; Ng1[1] = 0.211324865405187; Ng1[2] = 0.211324865405187; Ng1[3] = 0.788675134594813; Ng1_xi[0] = -1; Ng1_xi[1] = -1; Ng1_xi[2] = 1; Ng1_xi[3] = 1; wtg2[0] = 0.25; wtg2[1] = 0.25; wtg2[2] = 0.25; wtg2[3] = 0.25; Ng2[0] = 0.622008467928146; Ng2[1] = 0.166666666666667; Ng2[2] = 0.166666666666667; Ng2[3] = 0.044658198738520; Ng2[4] = 0.166666666666667; Ng2[5] = 0.622008467928146; Ng2[6] = 0.044658198738520; Ng2[7] = 0.166666666666667; Ng2[8] = 0.166666666666667; Ng2[9] = 0.044658198738520; Ng2[10] = 0.622008467928146; Ng2[11] = 0.166666666666667; Ng2[12] = 0.044658198738520; Ng2[13] = 0.166666666666667; Ng2[14] = 0.166666666666667; Ng2[15] = 0.622008467928146; Ng2_xi[0] = -0.788675134594813; Ng2_xi[1] = 0.788675134594813; Ng2_xi[2] = -0.211324865405187;Ng2_xi[3] = 0.211324865405187; Ng2_xi[4] = -0.788675134594813; Ng2_xi[5] = 0.788675134594813; Ng2_xi[6] = -0.211324865405187; Ng2_xi[7] = 0.211324865405187; Ng2_xi[8] = -0.211324865405187; Ng2_xi[9] = 0.211324865405187; Ng2_xi[10] = -0.788675134594813; Ng2_xi[11] = 0.788675134594813; Ng2_xi[12] = -0.211324865405187; Ng2_xi[13] = 0.211324865405187; Ng2_xi[14] = -0.788675134594813; Ng2_xi[15] = 0.788675134594813; Ng2_xi[16] = -0.788675134594813; Ng2_xi[17] = -0.211324865405187; Ng2_xi[18] = 0.788675134594813; Ng2_xi[19] = 0.211324865405187; Ng2_xi[20] = -0.211324865405187; Ng2_xi[21] = -0.788675134594813; Ng2_xi[22] = 0.211324865405187; Ng2_xi[23] = 0.788675134594813; Ng2_xi[24] = -0.788675134594813; Ng2_xi[25] = -0.211324865405187; Ng2_xi[26] = 0.788675134594813; Ng2_xi[27] = 0.211324865405187; Ng2_xi[28] = -0.211324865405187; Ng2_xi[29] = -0.788675134594813; Ng2_xi[30] = 0.211324865405187; Ng2_xi[31] = 0.788675134594813; minf = 0.1; m2 = minf*minf; freq = 1; kappa = 1; nmode = 0; mfan = 1.0; double *phim = (double *)malloc(nnode*sizeof(double)); memset(phim,0,nnode*sizeof(double)); for (int i = 0;i<nnode;i++) { phim[i] = minf*xm[2*i]; } double *K = (double *)malloc(4*4*ncell*sizeof(double)); memset(K,0,4*4*ncell*sizeof(double)); double *resm = (double *)malloc(nnode*sizeof(double)); memset(resm,0,nnode*sizeof(double)); double *V = (double *)malloc(nnode*sizeof(double)); memset(V,0,nnode*sizeof(double)); double *P = (double *)malloc(nnode*sizeof(double)); memset(P,0,nnode*sizeof(double)); double *U = (double *)malloc(nnode*sizeof(double)); memset(U,0,nnode*sizeof(double)); // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set bnodes = op_decl_set(nbnodes, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pbnodes = op_decl_map(bnodes,nodes,1,bnode, "pbedge"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_xm = op_decl_dat(nodes ,2,"double",xm ,"p_x"); op_dat p_phim = op_decl_dat(nodes, 1, "double", phim, "p_phim"); op_dat p_resm = op_decl_dat(nodes, 1, "double", resm, "p_resm"); op_dat p_K = op_decl_dat(cells, 16, "double:soa", K, "p_K"); op_dat p_V = op_decl_dat(nodes, 1, "double", V, "p_V"); op_dat p_P = op_decl_dat(nodes, 1, "double", P, "p_P"); op_dat p_U = op_decl_dat(nodes, 1, "double", U, "p_U"); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&gm1i ); op_decl_const(1,"double",&m2 ); op_decl_const(2,"double",wtg1 ); op_decl_const(2,"double",xi1 ); op_decl_const(4,"double",Ng1 ); op_decl_const(4,"double",Ng1_xi ); op_decl_const(4,"double",wtg2 ); op_decl_const(16,"double",Ng2 ); op_decl_const(32,"double",Ng2_xi ); op_decl_const(1,"double",&minf ); op_decl_const(1,"double",&freq ); op_decl_const(1,"double",&kappa ); op_decl_const(1,"double",&nmode ); op_decl_const(1,"double",&mfan ); op_diagnostic_output(); op_partition("PTSCOTCH", "KWAY", cells, pcell, NULL); // main time-marching loop niter = 20; //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); for(int iter=1; iter<=niter; iter++) { op_par_loop(res_calc,"res_calc",cells, op_arg_dat(p_xm, -4, pcell, 2,"double",OP_READ), op_arg_dat(p_phim, -4, pcell, 1,"double",OP_READ), op_arg_dat(p_K, -1, OP_ID, 16,"double:soa",OP_WRITE), op_arg_dat(p_resm, -4, pcell, 1,"double",OP_INC) ); op_par_loop(dirichlet,"dirichlet",bnodes, op_arg_dat(p_resm, 0, pbnodes, 1,"double",OP_WRITE)); double c1 = 0; double c2 = 0; double c3 = 0; double alpha = 0; double beta = 0; //c1 = R'*R; op_par_loop(init_cg, "init_cg", nodes, op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&c1, 1, "double", OP_INC), op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_WRITE), op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_WRITE), op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_WRITE)); //set up stopping conditions double res0 = sqrt(c1); double res = res0; int inner_iter = 0; int maxiter = 200; while (res > 0.1*res0 && inner_iter < maxiter) { //V = Stiffness*P op_par_loop(spMV, "spMV", cells, op_arg_dat(p_V, -4, pcell, 1, "double", OP_INC), op_arg_dat(p_K, -1, OP_ID, 16, "double:soa", OP_READ), op_arg_dat(p_P, -4, pcell, 1, "double", OP_READ)); op_par_loop(dirichlet,"dirichlet",bnodes, op_arg_dat(p_V, 0, pbnodes, 1,"double",OP_WRITE)); c2 = 0; //c2 = P'*V; op_par_loop(dotPV, "dotPV", nodes, op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_READ), op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&c2, 1, "double", OP_INC)); alpha = c1/c2; //U = U + alpha*P; //resm = resm-alpha*V; op_par_loop(updateUR, "updateUR", nodes, op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_INC), op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_INC), op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_READ), op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_RW), op_arg_gbl(&alpha, 1, "double", OP_READ)); c3 = 0; //c3 = resm'*resm; op_par_loop(dotR, "dotR", nodes, op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&c3, 1, "double", OP_INC)); beta = c3/c1; //P = beta*P+resm; op_par_loop(updateP, "updateP", nodes, op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ), op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_RW), op_arg_gbl(&beta, 1, "double", OP_READ)); c1 = c3; res = sqrt(c1); inner_iter++; } rms = 0; //phim = phim - Stiffness\Load; op_par_loop(update, "update", nodes, op_arg_dat(p_phim, -1, OP_ID, 1, "double", OP_RW), op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_WRITE), op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&rms, 1, "double", OP_INC)); op_printf("rms = %10.5e iter: %d\n", sqrt(rms)/sqrt(g_nnode), inner_iter); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); /*free(cell); free(bnode); free(xm); free(phim); free(K); free(resm); free(V); free(P); free(U);*/ }
int main(int argc, char **argv){ int nnode, nedge, n, e; double dx; nnode = (NN-1)*(NN-1); nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2); dx = 1.0f / ((double) NN); int *pp = (int *)malloc(sizeof(int)*2*nedge); double *A = (double *)malloc(sizeof(double)*nedge); double *r = (double *)malloc(sizeof(double)*nnode); double *u = (double *)malloc(sizeof(double)*nnode); double *du = (double *)malloc(sizeof(double)*nnode); // create matrix and r.h.s., and set coordinates needed for renumbering / partitioning e = 0; for (int i=1; i<NN; i++) { for (int j=1; j<NN; j++) { n = i-1 + (j-1)*(NN-1); r[n] = 0.0f; u[n] = 0.0f; du[n] = 0.0f; pp[2*e] = n; pp[2*e+1] = n; A[e] = -1.0f; e++; for (int pass=0; pass<4; pass++) { int i2 = i; int j2 = j; if (pass==0) i2 += -1; if (pass==1) i2 += 1; if (pass==2) j2 += -1; if (pass==3) j2 += 1; if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) { r[n] += 0.25f; } else { pp[2*e] = n; pp[2*e+1] = i2-1 + (j2-1)*(NN-1); A[e] = 0.25f; e++; } } } } // OP initialisation op_init(argc,argv,5); // declare sets, pointers, and datasets op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_map ppedge = op_decl_map(edges,nodes,2,pp, "ppedge"); op_dat p_A = op_decl_dat(edges,1,"double",A, "p_A" ); op_dat p_r = op_decl_dat(nodes,1,"double",r, "p_r" ); op_dat p_u = op_decl_dat(nodes,1,"double",u, "p_u" ); op_dat p_du = op_decl_dat(nodes,1,"double",du, "p_du"); alpha = 1.0f; op_decl_const(1,"double",&alpha); op_diagnostic_output(); // main iteration loop double u_sum, u_max, beta = 1.0f; for (int iter=0; iter<NITER; iter++) { op_par_loop(res,"res", edges, op_arg_dat(p_A, -1,OP_ID, 1,"double",OP_READ), op_arg_dat(p_u, 1,ppedge, 1,"double",OP_READ), op_arg_dat(p_du, 0,ppedge, 1,"double",OP_INC ), op_arg_gbl(&beta,1,"double",OP_READ)); u_sum = 0.0f; u_max = 0.0f; op_par_loop(update,"update", nodes, op_arg_dat(p_r, -1,OP_ID, 1,"double",OP_READ), op_arg_dat(p_du, -1,OP_ID, 1,"double",OP_RW ), op_arg_dat(p_u, -1,OP_ID, 1,"double",OP_INC ), op_arg_gbl(&u_sum,1,"double",OP_INC), op_arg_gbl(&u_max,1,"double",OP_MAX)); printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/nnode)); } // print out results printf("\n Results after %d iterations:\n\n",NITER); op_fetch_data(p_u); /* op_fetch_data(p_du); op_fetch_data(p_r); */ for (int pass=0; pass<1; pass++) { /* if(pass==0) printf("\narray u\n"); else if(pass==1) printf("\narray du\n"); else if(pass==2) printf("\narray r\n"); */ for (int j=NN-1; j>0; j--) { for (int i=1; i<NN; i++) { if (pass==0) printf(" %7.4f",u[i-1 + (j-1)*(NN-1)]); else if (pass==1) printf(" %7.4f",du[i-1 + (j-1)*(NN-1)]); else if (pass==2) printf(" %7.4f",r[i-1 + (j-1)*(NN-1)]); } printf("\n"); } printf("\n"); } op_timing_output(); op_exit(); // free allocated arrays free(pp); free(A); free(r); free(u); free(du); }
int main(int argc, char *argv[]){ int *becell, *ecell, *bound, *bedge, *edge, *cell; REAL *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; REAL rms; if (argc != 2) { printf("Usage: airfoil <grid>\n"); exit(1); } // read in grid printf("reading in grid \n"); char* grid = argv[1]; FILE *fp; if ( (fp = fopen(grid,"r")) == NULL) { printf("can't open file %s\n", grid); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { printf("error reading from %s\n", grid); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (REAL *) malloc(2*nnode*sizeof(REAL)); q = (REAL *) malloc(4*ncell*sizeof(REAL)); qold = (REAL *) malloc(4*ncell*sizeof(REAL)); res = (REAL *) malloc(4*ncell*sizeof(REAL)); adt = (REAL *) malloc( ncell*sizeof(REAL)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%f %f \n",&x[2*n], &x[2*n+1]) != 2) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); // set constants and initialise flow field and residual printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; REAL mach = 0.4f; REAL alpha = 3.0f*atan(1.0f)/45.0f; REAL p = 1.0f; REAL r = 1.0f; REAL u = sqrt(gam*p/r)*mach; REAL e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = qinf[m]; res[4*n+m] = 0.0f; } } // OP initialisation op_init(argc,argv,2); op_tuner* global_tuner = op_create_global_tuner(); global_tuner->op_warpsize = 1; global_tuner->block_size = 64; global_tuner->part_size = 128; global_tuner->cache_line_size = 128; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,REAL_STRING,x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,REAL_STRING,q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,REAL_STRING,qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,REAL_STRING,adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,REAL_STRING,res ,"p_res"); op_decl_const(1,REAL_STRING,&gam ); op_decl_const(1,REAL_STRING,&gm1 ); op_decl_const(1,REAL_STRING,&cfl ); op_decl_const(1,REAL_STRING,&eps ); op_decl_const(1,REAL_STRING,&mach ); op_decl_const(1,REAL_STRING,&alpha); op_decl_const(4,REAL_STRING,qinf ); op_tuner* save_soln_tuner = op_create_tuner("save_soln"); save_soln_tuner->part_size = 64; save_soln_tuner->block_size = 4; op_tuner* adt_calc_tuner = op_create_tuner("adt_calc"); adt_calc_tuner->part_size = 64; adt_calc_tuner->block_size = 4; op_tuner* res_calc_tuner = op_create_tuner("res_calc"); res_calc_tuner->part_size = 64; res_calc_tuner->block_size = 4; op_tuner* bres_calc_tuner = op_create_tuner("bres_calc"); bres_calc_tuner->part_size = 64; bres_calc_tuner->block_size = 4; op_tuner* update_tuner = op_create_tuner("update"); update_tuner->part_size = 64; update_tuner->block_size = 4; op_diagnostic_output(); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,REAL_STRING,OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,REAL_STRING,OP_WRITE), save_soln_tuner); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,REAL_STRING,OP_READ ), op_arg_dat(p_x, 1,pcell, 2,REAL_STRING,OP_READ ), op_arg_dat(p_x, 2,pcell, 2,REAL_STRING,OP_READ ), op_arg_dat(p_x, 3,pcell, 2,REAL_STRING,OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,REAL_STRING,OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,REAL_STRING,OP_WRITE), adt_calc_tuner); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,REAL_STRING,OP_READ), op_arg_dat(p_x, 1,pedge, 2,REAL_STRING,OP_READ), op_arg_dat(p_q, 0,pecell,4,REAL_STRING,OP_READ), op_arg_dat(p_q, 1,pecell,4,REAL_STRING,OP_READ), op_arg_dat(p_adt, 0,pecell,1,REAL_STRING,OP_READ), op_arg_dat(p_adt, 1,pecell,1,REAL_STRING,OP_READ), op_arg_dat(p_res, 0,pecell,4,REAL_STRING,OP_INC ), op_arg_dat(p_res, 1,pecell,4,REAL_STRING,OP_INC ), res_calc_tuner); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,REAL_STRING,OP_READ), op_arg_dat(p_x, 1,pbedge, 2,REAL_STRING,OP_READ), op_arg_dat(p_q, 0,pbecell,4,REAL_STRING,OP_READ), op_arg_dat(p_adt, 0,pbecell,1,REAL_STRING,OP_READ), op_arg_dat(p_res, 0,pbecell,4,REAL_STRING,OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ), bres_calc_tuner); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,REAL_STRING,OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,REAL_STRING,OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,REAL_STRING,OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,REAL_STRING,OP_READ ), op_arg_gbl(&rms,1,REAL_STRING,OP_INC), update_tuner); } // print iteration history rms = sqrt(rms/(REAL) ncell); if ( iter % 100 == 0 ) printf(" %d %10.5e \n",iter,rms); } for ( int ll = 0; ll < 4*ncell; ll++ ) { printf ( "%lf\n", q[ll] ); } op_timing_output(); }
int main(int argc, char **argv){ int *becell, *ecell, *bound, *bedge, *edge, *cell; float *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; float rms; op_set nodes, edges, bedges, cells; op_map pedge, pecell, pbedge, pbecell, pcell; op_dat p_x, p_q, p_qold, p_res, p_adt, p_bound, p_rms; /* read in grid */ printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("new_grid.dat","r")) == NULL) { printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (float *) malloc(2*nnode*sizeof(float)); q = (float *) malloc(4*ncell*sizeof(float)); qold = (float *) malloc(4*ncell*sizeof(float)); res = (float *) malloc(4*ncell*sizeof(float)); adt = (float *) malloc( ncell*sizeof(float)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%f %f \n",&x[2*n], &x[2*n+1]) != 2) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); /* set constants and initialise flow field and residual */ printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; float mach = 0.4f; float alpha = 3.0f*atan(1.0f)/45.0f; float p = 1.0f; float r = 1.0f; float u = sqrt(gam*p/r)*mach; float e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = qinf[m]; res[4*n+m] = 0.0f; } } /* OP initialisation */ op_init(argc,argv,2); /* declare sets, pointers, datasets and global constants */ op_decl_set(&nodes, nnode, "nodes"); op_decl_set(&edges, nedge, "edges"); op_decl_set(&bedges,nbedge, "bedges"); op_decl_set(&cells, ncell, "cells"); op_decl_map(&pedge, &edges, &nodes,2,edge, "pedge"); op_decl_map(&pecell, &edges, &cells,2,ecell, "pecell"); op_decl_map(&pbedge, &bedges,&nodes,2,bedge, "pbedge"); op_decl_map(&pbecell,&bedges,&cells,1,becell,"pbecell"); op_decl_map(&pcell, &cells, &nodes,4,cell, "pcell"); op_decl_vec(&p_bound,&bedges,1,sizeof(int),bound,"p_bound"); op_decl_vec(&p_x ,&nodes ,2,sizeof(float),x ,"p_x"); op_decl_vec(&p_q ,&cells ,4,sizeof(float),q ,"p_q"); op_decl_vec(&p_qold ,&cells ,4,sizeof(float),qold ,"p_qold"); op_decl_vec(&p_adt ,&cells ,1,sizeof(float),adt ,"p_adt"); op_decl_vec(&p_res ,&cells ,4,sizeof(float),res ,"p_res"); op_decl_gbl(&p_rms ,1,sizeof(float),&rms ,"p_rms"); op_decl_const(&gam, 1, sizeof(float)); op_decl_const(&gm1, 1, sizeof(float)); op_decl_const(&cfl, 1, sizeof(float)); op_decl_const(&eps, 1, sizeof(float)); op_decl_const(&mach, 1, sizeof(float)); op_decl_const(&alpha,1, sizeof(float)); op_decl_const(qinf, 4, sizeof(float)); op_diagnostic_output(); /* main time-marching loop */ niter = 1000; for(int iter=1; iter<=niter; iter++) { /* save old flow solution */ op_par_loop_2((void(*)(void*,void*))save_soln,"save_soln", &cells, op_construct_vec_arg(&p_q, OP_NONE, NULL,OP_READ ), op_construct_vec_arg(&p_qold,OP_NONE, NULL,OP_WRITE)); /* predictor/corrector update loop */ for(int k=0; k<2; k++) { /* calculate area/timstep */ op_par_loop_3((void(*)(void*,void*,void*))adt_calc,"adt_calc",&cells, op_construct_vec_arg(&p_x, OP_ALL, &pcell, OP_READ ), op_construct_vec_arg(&p_q, OP_NONE, NULL, OP_READ ), op_construct_vec_arg(&p_adt, OP_NONE, NULL, OP_WRITE)); /* calculate flux residual */ op_par_loop_4((void(*)(void*,void*,void*,void*))res_calc,"res_calc",&edges, op_construct_vec_arg(&p_x, OP_ALL,&pedge, OP_READ), op_construct_vec_arg(&p_q, OP_ALL,&pecell,OP_READ), op_construct_vec_arg(&p_adt, OP_ALL,&pecell,OP_READ), op_construct_vec_arg(&p_res, OP_ALL,&pecell,OP_INC)); op_par_loop_5((void(*)(void*,void*,void*,void*,void*))bres_calc,"bres_calc",&bedges, op_construct_vec_arg(&p_x, OP_ALL,&pbedge, OP_READ), op_construct_vec_arg(&p_q, 0,&pbecell,OP_READ), op_construct_vec_arg(&p_adt, 0,&pbecell,OP_READ), op_construct_vec_arg(&p_res, 0,&pbecell,OP_INC), op_construct_vec_arg(&p_bound,OP_NONE, NULL,OP_READ)); /* update flow field */ rms = 0.0; op_par_loop_5((void(*)(void*,void*,void*,void*,void*))update,"update",&cells, op_construct_vec_arg(&p_qold,OP_NONE, NULL, OP_READ), op_construct_vec_arg(&p_q, OP_NONE, NULL, OP_WRITE), op_construct_vec_arg(&p_res, OP_NONE, NULL, OP_RW), op_construct_vec_arg(&p_adt, OP_NONE, NULL, OP_READ), op_construct_gbl_arg(&p_rms, OP_INC)); } /* print iteration history */ rms = sqrt(rms/(float) ncell); if (iter%100 == 0) printf(" %d %10.5e \n",iter,rms); } op_timing_output(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // set constants and initialise flow field and residual op_printf("initialising flow field \n"); char file[] = "new_grid.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell"); op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x"); op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q"); op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt"); op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res"); op_get_const_hdf5("gam", 1, "double", (char *)&gam, "new_grid.h5"); op_get_const_hdf5("gm1", 1, "double", (char *)&gm1, "new_grid.h5"); op_get_const_hdf5("cfl", 1, "double", (char *)&cfl, "new_grid.h5"); op_get_const_hdf5("eps", 1, "double", (char *)&eps, "new_grid.h5"); op_get_const_hdf5("mach", 1, "double", (char *)&mach, "new_grid.h5"); op_get_const_hdf5("alpha", 1, "double", (char *)&alpha, "new_grid.h5"); op_get_const_hdf5("qinf", 4, "double", (char *)&qinf, "new_grid.h5"); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&cfl ); op_decl_const(1,"double",&eps ); op_decl_const(1,"double",&mach ); op_decl_const(1,"double",&alpha); op_decl_const(4,"double",qinf ); op_diagnostic_output(); //write back original data just to compare you read the file correctly //do an h5diff between new_grid_out.h5 and new_grid.h5 to //compare two hdf5 files op_write_hdf5("new_grid_out.h5"); op_write_const_hdf5("gam",1,"double",(char *)&gam, "new_grid_out.h5"); op_write_const_hdf5("gm1",1,"double",(char *)&gm1, "new_grid_out.h5"); op_write_const_hdf5("cfl",1,"double",(char *)&cfl, "new_grid_out.h5"); op_write_const_hdf5("eps",1,"double",(char *)&eps, "new_grid_out.h5"); op_write_const_hdf5("mach",1,"double",(char *)&mach, "new_grid_out.h5"); op_write_const_hdf5("alpha",1,"double",(char *)&alpha, "new_grid_out.h5"); op_write_const_hdf5("qinf",4,"double",(char *)qinf, "new_grid_out.h5"); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x); int g_ncell = op_get_size(cells); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, -4,pcell, 2,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE)); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, -2,pedge, 2,"double",OP_READ), op_arg_dat(p_q, -2,pecell,4,"double",OP_READ), op_arg_dat(p_adt, -2,pecell,1,"double",OP_READ), op_arg_dat(p_res, -2,pecell,4,"double",OP_INC )); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, -2,pbedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"double",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double)g_ncell); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1); op_exit(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); int *becell, *ecell, *bound, *bedge, *edge, *cell; float *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge; /**------------------------BEGIN I/O -------------------**/ char file[] = "new_grid.dat"; char file_out[] = "new_grid_out.h5"; /* read in grid from disk on root processor */ FILE *fp; if ( (fp = fopen(file,"r")) == NULL) { op_printf("can't open file %s\n",file); exit(-1); } int g_nnode,g_ncell,g_nedge,g_nbedge; check_scan(fscanf(fp,"%d %d %d %d \n",&g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4); int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0; float *g_x = 0,*g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0; // set constants op_printf("initialising flow field\n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; float mach = 0.4f; float alpha = 3.0f*atan(1.0f)/45.0f; float p = 1.0f; float r = 1.0f; float u = sqrt(gam*p/r)*mach; float e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; op_printf("reading in grid \n"); op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n" ,g_nnode,g_ncell,g_nedge,g_nbedge); if(my_rank == MPI_ROOT) { g_cell = (int *) malloc(4*g_ncell*sizeof(int)); g_edge = (int *) malloc(2*g_nedge*sizeof(int)); g_ecell = (int *) malloc(2*g_nedge*sizeof(int)); g_bedge = (int *) malloc(2*g_nbedge*sizeof(int)); g_becell = (int *) malloc( g_nbedge*sizeof(int)); g_bound = (int *) malloc( g_nbedge*sizeof(int)); g_x = (float *) malloc(2*g_nnode*sizeof(float)); g_q = (float *) malloc(4*g_ncell*sizeof(float)); g_qold = (float *) malloc(4*g_ncell*sizeof(float)); g_res = (float *) malloc(4*g_ncell*sizeof(float)); g_adt = (float *) malloc( g_ncell*sizeof(float)); for (int n=0; n<g_nnode; n++){ check_scan(fscanf(fp,"%f %f \n",&g_x[2*n], &g_x[2*n+1]), 2); } for (int n=0; n<g_ncell; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_cell[4*n ], &g_cell[4*n+1], &g_cell[4*n+2], &g_cell[4*n+3]), 4); } for (int n=0; n<g_nedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_edge[2*n],&g_edge[2*n+1], &g_ecell[2*n],&g_ecell[2*n+1]), 4); } for (int n=0; n<g_nbedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_bedge[2*n],&g_bedge[2*n+1], &g_becell[n],&g_bound[n]), 4); } //initialise flow field and residual for (int n=0; n<g_ncell; n++) { for (int m=0; m<4; m++) { g_q[4*n+m] = qinf[m]; g_res[4*n+m] = 0.0f; } } } fclose(fp); nnode = compute_local_size (g_nnode, comm_size, my_rank); ncell = compute_local_size (g_ncell, comm_size, my_rank); nedge = compute_local_size (g_nedge, comm_size, my_rank); nbedge = compute_local_size (g_nbedge, comm_size, my_rank); op_printf("Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n" ,my_rank,nnode,ncell,nedge,nbedge); /*Allocate memory to hold local sets, mapping tables and data*/ cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (float *) malloc(2*nnode*sizeof(float)); q = (float *) malloc(4*ncell*sizeof(float)); qold = (float *) malloc(4*ncell*sizeof(float)); res = (float *) malloc(4*ncell*sizeof(float)); adt = (float *) malloc( ncell*sizeof(float)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4); scatter_int_array(g_edge, edge, comm_size, g_nedge,nedge, 2); scatter_int_array(g_ecell, ecell, comm_size, g_nedge,nedge, 2); scatter_int_array(g_bedge, bedge, comm_size, g_nbedge,nbedge, 2); scatter_int_array(g_becell, becell, comm_size, g_nbedge,nbedge, 1); scatter_int_array(g_bound, bound, comm_size, g_nbedge,nbedge, 1); scatter_float_array(g_x, x, comm_size, g_nnode,nnode, 2); scatter_float_array(g_q, q, comm_size, g_ncell,ncell, 4); scatter_float_array(g_qold, qold, comm_size, g_ncell,ncell, 4); scatter_float_array(g_res, res, comm_size, g_ncell,ncell, 4); scatter_float_array(g_adt, adt, comm_size, g_ncell,ncell, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if(my_rank == MPI_ROOT) { free(g_cell); free(g_edge); free(g_ecell); free(g_bedge); free(g_becell); free(g_bound); free(g_x ); free(g_q); free(g_qold); free(g_adt); free(g_res); } /**------------------------END I/O -----------------------**/ /* FIXME: It's not clear to the compiler that sth. is going on behind the scenes here. Hence theses variables are reported as unused */ op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"float",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"float",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"float",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"float",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"float",res ,"p_res"); op_decl_const(1,"float",&gam ); op_decl_const(1,"float",&gm1 ); op_decl_const(1,"float",&cfl ); op_decl_const(1,"float",&eps ); op_decl_const(1,"float",&mach ); op_decl_const(1,"float",&alpha); op_decl_const(4,"float",qinf ); op_dump_to_hdf5(file_out); op_write_const_hdf5("gam", 1,"float",(char *)&gam, "new_grid_out.h5"); op_write_const_hdf5("gm1", 1,"float",(char *)&gm1, "new_grid_out.h5"); op_write_const_hdf5("cfl", 1,"float",(char *)&cfl, "new_grid_out.h5"); op_write_const_hdf5("eps", 1,"float",(char *)&eps, "new_grid_out.h5"); op_write_const_hdf5("mach", 1,"float",(char *)&mach, "new_grid_out.h5"); op_write_const_hdf5("alpha",1,"float",(char *)&alpha,"new_grid_out.h5"); op_write_const_hdf5("qinf", 4,"float",(char *)qinf, "new_grid_out.h5"); //create halos - for sanity check op_halo_create(); op_exit(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc, argv, 2); int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode, ncell, nedge, nbedge, niter; double rms; // timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // read in grid op_printf("reading in grid \n"); FILE *fp; if ((fp = fopen("./new_grid.dat", "r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp, "%d %d %d %d \n", &nnode, &ncell, &nedge, &nbedge) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *)malloc(4 * ncell * sizeof(int)); edge = (int *)malloc(2 * nedge * sizeof(int)); ecell = (int *)malloc(2 * nedge * sizeof(int)); bedge = (int *)malloc(2 * nbedge * sizeof(int)); becell = (int *)malloc(nbedge * sizeof(int)); bound = (int *)malloc(nbedge * sizeof(int)); x = (double *)malloc(2 * nnode * sizeof(double)); q = (double *)malloc(4 * ncell * sizeof(double)); qold = (double *)malloc(4 * ncell * sizeof(double)); res = (double *)malloc(4 * ncell * sizeof(double)); adt = (double *)malloc(ncell * sizeof(double)); for (int n = 0; n < nnode; n++) { if (fscanf(fp, "%lf %lf \n", &x[2 * n], &x[2 * n + 1]) != 2) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n = 0; n < ncell; n++) { if (fscanf(fp, "%d %d %d %d \n", &cell[4 * n], &cell[4 * n + 1], &cell[4 * n + 2], &cell[4 * n + 3]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n = 0; n < nedge; n++) { if (fscanf(fp, "%d %d %d %d \n", &edge[2 * n], &edge[2 * n + 1], &ecell[2 * n], &ecell[2 * n + 1]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n = 0; n < nbedge; n++) { if (fscanf(fp, "%d %d %d %d \n", &bedge[2 * n], &bedge[2 * n + 1], &becell[n], &bound[n]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); // set constants and initialise flow field and residual op_printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f * atan(1.0f) / 45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam * p / r) * mach; double e = p / (r * gm1) + 0.5f * u * u; qinf[0] = r; qinf[1] = r * u; qinf[2] = 0.0f; qinf[3] = r * e; for (int n = 0; n < ncell; n++) { for (int m = 0; m < 4; m++) { q[4 * n + m] = qinf[m]; res[4 * n + m] = 0.0f; } } // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes, 2, edge, "pedge"); op_map pecell = op_decl_map(edges, cells, 2, ecell, "pecell"); op_map pbedge = op_decl_map(bedges, nodes, 2, bedge, "pbedge"); op_map pbecell = op_decl_map(bedges, cells, 1, becell, "pbecell"); op_map pcell = op_decl_map(cells, nodes, 4, cell, "pcell"); op_dat p_bound = op_decl_dat(bedges, 1, "int", bound, "p_bound"); op_dat p_x = op_decl_dat(nodes, 2, "double", x, "p_x"); op_dat p_q = op_decl_dat(cells, 4, "double", q, "p_q"); // op_dat p_qold = op_decl_dat(cells ,4,"double",qold ,"p_qold"); // op_dat p_adt = op_decl_dat(cells ,1,"double",adt ,"p_adt"); // op_dat p_res = op_decl_dat(cells ,4,"double",res ,"p_res"); // p_res, p_adt and p_qold now declared as a temp op_dats during // the execution of the time-marching loop op_decl_const(1, "double", &gam); op_decl_const(1, "double", &gm1); op_decl_const(1, "double", &cfl); op_decl_const(1, "double", &eps); op_decl_const(1, "double", &mach); op_decl_const(1, "double", &alpha); op_decl_const(4, "double", qinf); op_diagnostic_output(); double g_ncell = op_get_size(cells); // initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for (int iter = 1; iter <= niter; iter++) { double *tmp_elem = NULL; op_dat p_res = op_decl_dat_temp(cells, 4, "double", tmp_elem, "p_res"); op_dat p_adt = op_decl_dat_temp(cells, 1, "double", tmp_elem, "p_adt"); op_dat p_qold = op_decl_dat_temp(cells, 4, "double", qold, "p_qold"); // save old flow solution op_par_loop(save_soln, "save_soln", cells, op_arg_dat(p_q, -1, OP_ID, 4, "double", OP_READ), op_arg_dat(p_qold, -1, OP_ID, 4, "double", OP_WRITE)); // predictor/corrector update loop for (int k = 0; k < 2; k++) { // calculate area/timstep op_par_loop(adt_calc, "adt_calc", cells, op_arg_dat(p_x, 0, pcell, 2, "double", OP_READ), op_arg_dat(p_x, 1, pcell, 2, "double", OP_READ), op_arg_dat(p_x, 2, pcell, 2, "double", OP_READ), op_arg_dat(p_x, 3, pcell, 2, "double", OP_READ), op_arg_dat(p_q, -1, OP_ID, 4, "double", OP_READ), op_arg_dat(p_adt, -1, OP_ID, 1, "double", OP_WRITE)); // calculate flux residual op_par_loop(res_calc, "res_calc", edges, op_arg_dat(p_x, 0, pedge, 2, "double", OP_READ), op_arg_dat(p_x, 1, pedge, 2, "double", OP_READ), op_arg_dat(p_q, 0, pecell, 4, "double", OP_READ), op_arg_dat(p_q, 1, pecell, 4, "double", OP_READ), op_arg_dat(p_adt, 0, pecell, 1, "double", OP_READ), op_arg_dat(p_adt, 1, pecell, 1, "double", OP_READ), op_arg_dat(p_res, 0, pecell, 4, "double", OP_INC), op_arg_dat(p_res, 1, pecell, 4, "double", OP_INC)); op_par_loop(bres_calc, "bres_calc", bedges, op_arg_dat(p_x, 0, pbedge, 2, "double", OP_READ), op_arg_dat(p_x, 1, pbedge, 2, "double", OP_READ), op_arg_dat(p_q, 0, pbecell, 4, "double", OP_READ), op_arg_dat(p_adt, 0, pbecell, 1, "double", OP_READ), op_arg_dat(p_res, 0, pbecell, 4, "double", OP_INC), op_arg_dat(p_bound, -1, OP_ID, 1, "int", OP_READ)); // update flow field rms = 0.0; op_par_loop(update, "update", cells, op_arg_dat(p_qold, -1, OP_ID, 4, "double", OP_READ), op_arg_dat(p_q, -1, OP_ID, 4, "double", OP_WRITE), op_arg_dat(p_res, -1, OP_ID, 4, "double", OP_RW), op_arg_dat(p_adt, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&rms, 1, "double", OP_INC)); } // print iteration history rms = sqrt(rms / (double)g_ncell); if (iter % 100 == 0) op_printf(" %d %10.5e \n", iter, rms); if (iter % 1000 == 0 && g_ncell == 720000) { // defailt mesh -- for validation testing // op_printf(" %d %3.16f \n",iter,rms); double diff = fabs((100.0 * (rms / 0.0001060114637578)) - 100.0); op_printf("\n\nTest problem with %d cells is within %3.15E %% of the " "expected solution\n", 720000, diff); if (diff < 0.00001) { op_printf("This test is considered PASSED\n"); } else { op_printf("This test is considered FAILED\n"); } } if (op_free_dat_temp(p_res) < 0) op_printf("Error: temporary op_dat %s cannot be removed\n", p_res->name); if (op_free_dat_temp(p_adt) < 0) op_printf("Error: temporary op_dat %s cannot be removed\n", p_adt->name); if (op_free_dat_temp(p_qold) < 0) op_printf("Error: temporary op_dat %s cannot be removed\n", p_qold->name); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = %f\n", wall_t2 - wall_t1); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // read in grid op_printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("./new_grid.dat","r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (double *) malloc(2*nnode*sizeof(double)); q = (double *) malloc(4*ncell*sizeof(double)); qold = (double *) malloc(4*ncell*sizeof(double)); res = (double *) malloc(4*ncell*sizeof(double)); adt = (double *) malloc( ncell*sizeof(double)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%lf %lf \n",&x[2*n], &x[2*n+1]) != 2) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); // set constants and initialise flow field and residual op_printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = qinf[m]; res[4*n+m] = 0.0f; } } // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"double",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"double",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"double",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"double",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"double",res ,"p_res"); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&cfl ); op_decl_const(1,"double",&eps ); op_decl_const(1,"double",&mach ); op_decl_const(1,"double",&alpha); op_decl_const(4,"double",qinf ); op_diagnostic_output(); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE)); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pecell,4,"double",OP_READ), op_arg_dat(p_q, 1,pecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"double",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pecell,4,"double",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"double",OP_INC )); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"double",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double) op_get_size(cells)); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); //output the result dat array to files op_print_dat_to_txtfile(p_q, "out_grid_seq.dat"); //ASCI op_print_dat_to_binfile(p_q, "out_grid_seq.bin"); //Binary op_timing_output(); op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // set constants and initialise flow field and residual op_printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; char file[] = "new_grid.h5";//"new_grid-26mil.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell"); op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x"); op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q"); op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt"); op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res"); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&cfl ); op_decl_const(1,"double",&eps ); op_decl_const(1,"double",&mach ); op_decl_const(1,"double",&alpha); op_decl_const(4,"double",qinf ); op_diagnostic_output(); int g_ncell = op_get_size(cells); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE)); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pecell,4,"double",OP_READ), op_arg_dat(p_q, 1,pecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"double",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pecell,4,"double",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"double",OP_INC )); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"double",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double)g_ncell); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1); op_exit(); }