int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,5); int nnode, nedge, n, e; float dx; nnode = (NN-1)*(NN-1); nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2); dx = 1.0f / ((float) NN); int *pp = (int *)malloc(sizeof(int)*2*nedge); int *p1 = (int *)malloc(sizeof(int)*nedge); int *p2 = (int *)malloc(sizeof(int)*nedge); float *xe = (float *)malloc(sizeof(float)*2*nedge); float *xn = (float *)malloc(sizeof(float)*2*nnode); double *A = (double *)malloc(sizeof(double)*3*nedge); float *r = (float *)malloc(sizeof(float)*2*nnode); float *u = (float *)malloc(sizeof(float)*2*nnode); float *du = (float *)malloc(sizeof(float)*3*nnode); // create matrix and r.h.s., and set coordinates needed for renumbering / partitioning e = 0; for (int i=1; i<NN; i++) { for (int j=1; j<NN; j++) { n = i-1 + (j-1)*(NN-1); r[2*n] = 0.0f; u[2*n] = 0.0f; du[3*n] = 0.0f; xn[2*n ] = i*dx; xn[2*n+1] = j*dx; p1[e] = n; p2[e] = n; pp[2*e] = p1[e]; pp[2*e+1] = p2[e]; A[3*e] = -1.0f; xe[2*e ] = i*dx; xe[2*e+1] = j*dx; e++; for (int pass=0; pass<4; pass++) { int i2 = i; int j2 = j; if (pass==0) i2 += -1; if (pass==1) i2 += 1; if (pass==2) j2 += -1; if (pass==3) j2 += 1; if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) { r[2*n] += 0.25f; } else { p1[e] = n; p2[e] = i2-1 + (j2-1)*(NN-1); pp[2*e] = p1[e]; pp[2*e+1] = p2[e]; A[3*e] = 0.25f; xe[2*e ] = i*dx; xe[2*e+1] = j*dx; e++; } } } } // declare sets, pointers, and datasets op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_map ppedge = op_decl_map(edges,nodes,2,pp, "ppedge"); op_dat p_A = op_decl_dat(edges,3,"double",A, "p_A" ); op_dat p_r = op_decl_dat(nodes,2,"float", r, "p_r" ); op_dat p_u = op_decl_dat(nodes,2,"float", u, "p_u" ); op_dat p_du = op_decl_dat(nodes,3,"float", du, "p_du"); alpha = 2.0f; op_decl_const2("alpha",1,"float",&alpha); alpha = 1.0f; op_decl_const2("alpha",1,"float",&alpha); op_diagnostic_output(); // main iteration loop float u_sum, u_max, beta = 1.0f; for (int iter=0; iter<NITER; iter++) { op_par_loop_res("res",edges, op_arg_dat(p_A,-1,OP_ID,3,"double",OP_READ), op_arg_dat(p_u,1,ppedge,2,"float",OP_READ), op_arg_dat(p_du,0,ppedge,3,"float",OP_INC), op_arg_gbl(&beta,1,"float",OP_READ)); u_sum = 0.0f; u_max = 0.0f; op_par_loop_update("update",nodes, op_arg_dat(p_r,-1,OP_ID,2,"float",OP_READ), op_arg_dat(p_du,-1,OP_ID,3,"float",OP_RW), op_arg_dat(p_u,-1,OP_ID,2,"float",OP_INC), op_arg_gbl(&u_sum,1,"float",OP_INC), op_arg_gbl(&u_max,1,"float",OP_MAX)); op_printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/nnode)); } // print out results op_printf("\n Results after %d iterations:\n\n",NITER); op_fetch_data(p_u); for (int pass=0; pass<1; pass++) { for (int j=NN-1; j>0; j--) { for (int i=1; i<NN; i++) { if (pass==0) op_printf(" %7.4f",u[2*(i-1 + (j-1)*(NN-1))]); else if (pass==1) op_printf(" %7.4f",du[i-1 + (j-1)*(NN-1)]); else if (pass==2) op_printf(" %7.4f",r[2*(i-1 + (j-1)*(NN-1))]); } op_printf("\n"); } op_printf("\n"); } op_timing_output(); op_exit(); free(pp); free(A); free(u); free(du); free(r); }
int main(int argc, char **argv) { // OP initialisation op_init(argc, argv, 2); // MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); // timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode, ncell, nedge, nbedge, niter; /**------------------------BEGIN I/O and PARTITIONING -------------------**/ op_timers(&cpu_t1, &wall_t1); /* read in grid from disk on root processor */ FILE *fp; if ((fp = fopen("new_grid.dat", "r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } int g_nnode, g_ncell, g_nedge, g_nbedge; check_scan( fscanf(fp, "%d %d %d %d \n", &g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4); int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0; double *g_x = 0, *g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0; op_printf("reading in grid \n"); op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n", g_nnode, g_ncell, g_nedge, g_nbedge); if (my_rank == MPI_ROOT) { g_cell = (int *)malloc(4 * g_ncell * sizeof(int)); g_edge = (int *)malloc(2 * g_nedge * sizeof(int)); g_ecell = (int *)malloc(2 * g_nedge * sizeof(int)); g_bedge = (int *)malloc(2 * g_nbedge * sizeof(int)); g_becell = (int *)malloc(g_nbedge * sizeof(int)); g_bound = (int *)malloc(g_nbedge * sizeof(int)); g_x = (double *)malloc(2 * g_nnode * sizeof(double)); g_q = (double *)malloc(4 * g_ncell * sizeof(double)); g_qold = (double *)malloc(4 * g_ncell * sizeof(double)); g_res = (double *)malloc(4 * g_ncell * sizeof(double)); g_adt = (double *)malloc(g_ncell * sizeof(double)); for (int n = 0; n < g_nnode; n++) { check_scan(fscanf(fp, "%lf %lf \n", &g_x[2 * n], &g_x[2 * n + 1]), 2); } for (int n = 0; n < g_ncell; n++) { check_scan(fscanf(fp, "%d %d %d %d \n", &g_cell[4 * n], &g_cell[4 * n + 1], &g_cell[4 * n + 2], &g_cell[4 * n + 3]), 4); } for (int n = 0; n < g_nedge; n++) { check_scan(fscanf(fp, "%d %d %d %d \n", &g_edge[2 * n], &g_edge[2 * n + 1], &g_ecell[2 * n], &g_ecell[2 * n + 1]), 4); } for (int n = 0; n < g_nbedge; n++) { check_scan(fscanf(fp, "%d %d %d %d \n", &g_bedge[2 * n], &g_bedge[2 * n + 1], &g_becell[n], &g_bound[n]), 4); } // initialise flow field and residual } fclose(fp); nnode = compute_local_size(g_nnode, comm_size, my_rank); ncell = compute_local_size(g_ncell, comm_size, my_rank); nedge = compute_local_size(g_nedge, comm_size, my_rank); nbedge = compute_local_size(g_nbedge, comm_size, my_rank); op_printf( "Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n", my_rank, nnode, ncell, nedge, nbedge); /*Allocate memory to hold local sets, mapping tables and data*/ cell = (int *)malloc(4 * ncell * sizeof(int)); edge = (int *)malloc(2 * nedge * sizeof(int)); ecell = (int *)malloc(2 * nedge * sizeof(int)); bedge = (int *)malloc(2 * nbedge * sizeof(int)); becell = (int *)malloc(nbedge * sizeof(int)); bound = (int *)malloc(nbedge * sizeof(int)); x = (double *)malloc(2 * nnode * sizeof(double)); q = (double *)malloc(4 * ncell * sizeof(double)); qold = (double *)malloc(4 * ncell * sizeof(double)); res = (double *)malloc(4 * ncell * sizeof(double)); adt = (double *)malloc(ncell * sizeof(double)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_cell, cell, comm_size, g_ncell, ncell, 4); scatter_int_array(g_edge, edge, comm_size, g_nedge, nedge, 2); scatter_int_array(g_ecell, ecell, comm_size, g_nedge, nedge, 2); scatter_int_array(g_bedge, bedge, comm_size, g_nbedge, nbedge, 2); scatter_int_array(g_becell, becell, comm_size, g_nbedge, nbedge, 1); scatter_int_array(g_bound, bound, comm_size, g_nbedge, nbedge, 1); scatter_double_array(g_x, x, comm_size, g_nnode, nnode, 2); scatter_double_array(g_q, q, comm_size, g_ncell, ncell, 4); scatter_double_array(g_qold, qold, comm_size, g_ncell, ncell, 4); scatter_double_array(g_res, res, comm_size, g_ncell, ncell, 4); scatter_double_array(g_adt, adt, comm_size, g_ncell, ncell, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if (my_rank == MPI_ROOT) { free(g_cell); free(g_edge); free(g_ecell); free(g_bedge); free(g_becell); free(g_bound); free(g_x); free(g_q); free(g_qold); free(g_adt); free(g_res); } op_timers(&cpu_t2, &wall_t2); op_printf("Max total file read time = %f\n", wall_t2 - wall_t1); /**------------------------END I/O and PARTITIONING -----------------------**/ op_set edges = op_decl_set(nedge, "edges"); op_set cells = op_decl_set(ncell, "cells"); op_map pecell = op_decl_map(edges, cells, 2, ecell, "pecell"); op_dat p_res = op_decl_dat(cells, 4, "double", res, "p_res"); int count; // trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", cells, pecell, NULL); op_diagnostic_output(); // initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // indirect reduction count = 0; op_par_loop_res_calc("res_calc", edges, op_arg_dat(p_res, 0, pecell, 4, "double", OP_INC), op_arg_gbl(&count, 1, "int", OP_INC)); op_printf("number of edges:: %d should be: %d \n", count, g_nedge); if (count != g_nedge) op_printf("indirect reduction FAILED\n"); else op_printf("indirect reduction PASSED\n"); // direct reduction count = 0; op_par_loop_update("update", cells, op_arg_dat(p_res, -1, OP_ID, 4, "double", OP_RW), op_arg_gbl(&count, 1, "int", OP_INC)); op_printf("number of cells: %d should be: %d \n", count, g_ncell); if (count != g_ncell) op_printf("direct reduction FAILED\n"); else op_printf("direct reduction PASSED\n"); op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int *bnode, *cell; double *xm;//, *q; int nnode,ncell,nbnodes,niter; double rms = 1; // set constants and initialise flow field and residual op_printf("initialising flow field \n"); double gam = 1.4; gm1 = gam - 1.0; gm1i = 1.0/gm1; wtg1[0] = 0.5; wtg1[1] = 0.5; xi1[0] = 0.211324865405187; xi1[1] = 0.788675134594813; Ng1[0] = 0.788675134594813; Ng1[1] = 0.211324865405187; Ng1[2] = 0.211324865405187; Ng1[3] = 0.788675134594813; Ng1_xi[0] = -1; Ng1_xi[1] = -1; Ng1_xi[2] = 1; Ng1_xi[3] = 1; wtg2[0] = 0.25; wtg2[1] = 0.25; wtg2[2] = 0.25; wtg2[3] = 0.25; Ng2[0] = 0.622008467928146; Ng2[1] = 0.166666666666667; Ng2[2] = 0.166666666666667; Ng2[3] = 0.044658198738520; Ng2[4] = 0.166666666666667; Ng2[5] = 0.622008467928146; Ng2[6] = 0.044658198738520; Ng2[7] = 0.166666666666667; Ng2[8] = 0.166666666666667; Ng2[9] = 0.044658198738520; Ng2[10] = 0.622008467928146; Ng2[11] = 0.166666666666667; Ng2[12] = 0.044658198738520; Ng2[13] = 0.166666666666667; Ng2[14] = 0.166666666666667; Ng2[15] = 0.622008467928146; Ng2_xi[0] = -0.788675134594813; Ng2_xi[1] = 0.788675134594813; Ng2_xi[2] = -0.211324865405187;Ng2_xi[3] = 0.211324865405187; Ng2_xi[4] = -0.788675134594813; Ng2_xi[5] = 0.788675134594813; Ng2_xi[6] = -0.211324865405187; Ng2_xi[7] = 0.211324865405187; Ng2_xi[8] = -0.211324865405187; Ng2_xi[9] = 0.211324865405187; Ng2_xi[10] = -0.788675134594813; Ng2_xi[11] = 0.788675134594813; Ng2_xi[12] = -0.211324865405187; Ng2_xi[13] = 0.211324865405187; Ng2_xi[14] = -0.788675134594813; Ng2_xi[15] = 0.788675134594813; Ng2_xi[16] = -0.788675134594813; Ng2_xi[17] = -0.211324865405187; Ng2_xi[18] = 0.788675134594813; Ng2_xi[19] = 0.211324865405187; Ng2_xi[20] = -0.211324865405187; Ng2_xi[21] = -0.788675134594813; Ng2_xi[22] = 0.211324865405187; Ng2_xi[23] = 0.788675134594813; Ng2_xi[24] = -0.788675134594813; Ng2_xi[25] = -0.211324865405187; Ng2_xi[26] = 0.788675134594813; Ng2_xi[27] = 0.211324865405187; Ng2_xi[28] = -0.211324865405187; Ng2_xi[29] = -0.788675134594813; Ng2_xi[30] = 0.211324865405187; Ng2_xi[31] = 0.788675134594813; minf = 0.1; m2 = minf*minf; freq = 1; kappa = 1; nmode = 0; mfan = 1.0; char file[] = "FE_grid.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set bnodes = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pbnodes = op_decl_map_hdf5(bnodes,nodes,1,file, "pbedge"); op_map pcell = op_decl_map_hdf5(cells, nodes,4,file, "pcell"); op_dat p_xm = op_decl_dat_hdf5(nodes ,2,"double", file, "p_x"); op_dat p_phim = op_decl_dat_hdf5(nodes, 1, "double", file, "p_phim"); op_dat p_resm = op_decl_dat_hdf5(nodes, 1, "double", file, "p_resm"); op_dat p_K = op_decl_dat_hdf5(cells, 16, "double:soa",file, "p_K"); op_dat p_V = op_decl_dat_hdf5(nodes, 1, "double", file, "p_V"); op_dat p_P = op_decl_dat_hdf5(nodes, 1, "double", file, "p_P"); op_dat p_U = op_decl_dat_hdf5(nodes, 1, "double", file, "p_U"); op_decl_const2("gam",1,"double",&gam ); op_decl_const2("gm1",1,"double",&gm1 ); op_decl_const2("gm1i",1,"double",&gm1i ); op_decl_const2("m2",1,"double",&m2 ); op_decl_const2("wtg1",2,"double",wtg1 ); op_decl_const2("xi1",2,"double",xi1 ); op_decl_const2("Ng1",4,"double",Ng1 ); op_decl_const2("Ng1_xi",4,"double",Ng1_xi ); op_decl_const2("wtg2",4,"double",wtg2 ); op_decl_const2("Ng2",16,"double",Ng2 ); op_decl_const2("Ng2_xi",32,"double",Ng2_xi ); op_decl_const2("minf",1,"double",&minf ); op_decl_const2("freq",1,"double",&freq ); op_decl_const2("kappa",1,"double",&kappa ); op_decl_const2("nmode",1,"double",&nmode ); op_decl_const2("mfan",1,"double",&mfan ); op_diagnostic_output(); op_partition("PTSCOTCH", "KWAY", cells, pcell, p_xm); op_printf("nodes: %d cells: %d bnodes: %d\n", nodes->size, cells->size, bnodes->size); nnode = op_get_size(nodes); ncell = op_get_size(cells); nbnodes = op_get_size(bnodes); double cpu_t1, cpu_t2, wall_t1, wall_t2; op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 20; for(int iter=1; iter<=niter; iter++) { op_par_loop_res_calc("res_calc",cells, op_arg_dat(p_xm,-4,pcell,2,"double",OP_READ), op_arg_dat(p_phim,-4,pcell,1,"double",OP_READ), op_arg_dat(p_K,-1,OP_ID,16,"double:soa",OP_WRITE), op_arg_dat(p_resm,-4,pcell,1,"double",OP_INC)); op_par_loop_dirichlet("dirichlet",bnodes, op_arg_dat(p_resm,0,pbnodes,1,"double",OP_WRITE)); double c1 = 0; double c2 = 0; double c3 = 0; double alpha = 0; double beta = 0; //c1 = R'*R; op_par_loop_init_cg("init_cg",nodes, op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&c1,1,"double",OP_INC), op_arg_dat(p_U,-1,OP_ID,1,"double",OP_WRITE), op_arg_dat(p_V,-1,OP_ID,1,"double",OP_WRITE), op_arg_dat(p_P,-1,OP_ID,1,"double",OP_WRITE)); //set up stopping conditions double res0 = sqrt(c1); double res = res0; int iter = 0; int maxiter = 200; while (res > 0.1*res0 && iter < maxiter) { //V = Stiffness*P op_par_loop_spMV("spMV",cells, op_arg_dat(p_V,-4,pcell,1,"double",OP_INC), op_arg_dat(p_K,-1,OP_ID,16,"double:soa",OP_READ), op_arg_dat(p_P,-4,pcell,1,"double",OP_READ)); op_par_loop_dirichlet("dirichlet",bnodes, op_arg_dat(p_V,0,pbnodes,1,"double",OP_WRITE)); c2 = 0; //c2 = P'*V; op_par_loop_dotPV("dotPV",nodes, op_arg_dat(p_P,-1,OP_ID,1,"double",OP_READ), op_arg_dat(p_V,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&c2,1,"double",OP_INC)); alpha = c1/c2; //U = U + alpha*P; //resm = resm-alpha*V; op_par_loop_updateUR("updateUR",nodes, op_arg_dat(p_U,-1,OP_ID,1,"double",OP_INC), op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_INC), op_arg_dat(p_P,-1,OP_ID,1,"double",OP_READ), op_arg_dat(p_V,-1,OP_ID,1,"double",OP_RW), op_arg_gbl(&alpha,1,"double",OP_READ)); c3 = 0; //c3 = resm'*resm; op_par_loop_dotR("dotR",nodes, op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&c3,1,"double",OP_INC)); beta = c3/c1; //P = beta*P+resm; op_par_loop_updateP("updateP",nodes, op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_READ), op_arg_dat(p_P,-1,OP_ID,1,"double",OP_RW), op_arg_gbl(&beta,1,"double",OP_READ)); c1 = c3; res = sqrt(c1); iter++; } rms = 0; //phim = phim - Stiffness\Load; op_par_loop_update("update",nodes, op_arg_dat(p_phim,-1,OP_ID,1,"double",OP_RW), op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_WRITE), op_arg_dat(p_U,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&rms,1,"double",OP_INC)); op_printf("rms = %10.5e iter: %d\n", sqrt(rms)/sqrt(nnode), iter); } op_timing_output(); op_timers(&cpu_t2, &wall_t2); op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; double rms; /**------------------------BEGIN I/O and PARTITIONING -------------------**/ op_timers(&cpu_t1, &wall_t1); /* read in grid from disk on root processor */ FILE *fp; if ( (fp = fopen("new_grid.dat","r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } int g_nnode,g_ncell,g_nedge,g_nbedge; check_scan(fscanf(fp,"%d %d %d %d \n",&g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4); int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0; double *g_x = 0,*g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0; // set constants op_printf("initialising flow field\n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; op_printf("reading in grid \n"); op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n" ,g_nnode,g_ncell,g_nedge,g_nbedge); if(my_rank == MPI_ROOT) { g_cell = (int *) malloc(4*g_ncell*sizeof(int)); g_edge = (int *) malloc(2*g_nedge*sizeof(int)); g_ecell = (int *) malloc(2*g_nedge*sizeof(int)); g_bedge = (int *) malloc(2*g_nbedge*sizeof(int)); g_becell = (int *) malloc( g_nbedge*sizeof(int)); g_bound = (int *) malloc( g_nbedge*sizeof(int)); g_x = (double *) malloc(2*g_nnode*sizeof(double)); g_q = (double *) malloc(4*g_ncell*sizeof(double)); g_qold = (double *) malloc(4*g_ncell*sizeof(double)); g_res = (double *) malloc(4*g_ncell*sizeof(double)); g_adt = (double *) malloc( g_ncell*sizeof(double)); for (int n=0; n<g_nnode; n++){ check_scan(fscanf(fp,"%lf %lf \n",&g_x[2*n], &g_x[2*n+1]), 2); } for (int n=0; n<g_ncell; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_cell[4*n ], &g_cell[4*n+1], &g_cell[4*n+2], &g_cell[4*n+3]), 4); } for (int n=0; n<g_nedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_edge[2*n],&g_edge[2*n+1], &g_ecell[2*n],&g_ecell[2*n+1]), 4); } for (int n=0; n<g_nbedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_bedge[2*n],&g_bedge[2*n+1], &g_becell[n],&g_bound[n]), 4); } //initialise flow field and residual for (int n=0; n<g_ncell; n++) { for (int m=0; m<4; m++) { g_q[4*n+m] = qinf[m]; g_res[4*n+m] = 0.0f; } } } fclose(fp); nnode = compute_local_size (g_nnode, comm_size, my_rank); ncell = compute_local_size (g_ncell, comm_size, my_rank); nedge = compute_local_size (g_nedge, comm_size, my_rank); nbedge = compute_local_size (g_nbedge, comm_size, my_rank); op_printf("Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n" ,my_rank,nnode,ncell,nedge,nbedge); /*Allocate memory to hold local sets, mapping tables and data*/ cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (double *) malloc(2*nnode*sizeof(double)); q = (double *) malloc(4*ncell*sizeof(double)); qold = (double *) malloc(4*ncell*sizeof(double)); res = (double *) malloc(4*ncell*sizeof(double)); adt = (double *) malloc( ncell*sizeof(double)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4); scatter_int_array(g_edge, edge, comm_size, g_nedge,nedge, 2); scatter_int_array(g_ecell, ecell, comm_size, g_nedge,nedge, 2); scatter_int_array(g_bedge, bedge, comm_size, g_nbedge,nbedge, 2); scatter_int_array(g_becell, becell, comm_size, g_nbedge,nbedge, 1); scatter_int_array(g_bound, bound, comm_size, g_nbedge,nbedge, 1); scatter_double_array(g_x, x, comm_size, g_nnode,nnode, 2); scatter_double_array(g_q, q, comm_size, g_ncell,ncell, 4); scatter_double_array(g_qold, qold, comm_size, g_ncell,ncell, 4); scatter_double_array(g_res, res, comm_size, g_ncell,ncell, 4); scatter_double_array(g_adt, adt, comm_size, g_ncell,ncell, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if(my_rank == MPI_ROOT) { free(g_cell); free(g_edge); free(g_ecell); free(g_bedge); free(g_becell); free(g_bound); free(g_x ); free(g_q); free(g_qold); free(g_adt); free(g_res); } op_timers(&cpu_t2, &wall_t2); op_printf("Max total file read time = %f\n", wall_t2-wall_t1); /**------------------------END I/O and PARTITIONING -----------------------**/ // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"double",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"double",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"double",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"double",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"double",res ,"p_res"); op_decl_const2("gam",1,"double",&gam); op_decl_const2("gm1",1,"double",&gm1); op_decl_const2("cfl",1,"double",&cfl); op_decl_const2("eps",1,"double",&eps); op_decl_const2("mach",1,"double",&mach); op_decl_const2("alpha",1,"double",&alpha); op_decl_const2("qinf",4,"double",qinf); op_diagnostic_output(); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", cells, pecell, p_x); //op_partition("PARMETIS", "KWAY", cells, pecell, p_x); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); niter = 1000; for(int iter=1; iter<=niter; iter++) { //save old flow solution op_par_loop_save_soln("save_soln",cells, op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop_adt_calc("adt_calc",cells, op_arg_dat(p_x,0,pcell,2,"double",OP_READ), op_arg_dat(p_x,1,pcell,2,"double",OP_READ), op_arg_dat(p_x,2,pcell,2,"double",OP_READ), op_arg_dat(p_x,3,pcell,2,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_WRITE)); // calculate flux residual op_par_loop_res_calc("res_calc",edges, op_arg_dat(p_x,0,pedge,2,"double",OP_READ), op_arg_dat(p_x,1,pedge,2,"double",OP_READ), op_arg_dat(p_q,0,pecell,4,"double",OP_READ), op_arg_dat(p_q,1,pecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pecell,1,"double",OP_READ), op_arg_dat(p_adt,1,pecell,1,"double",OP_READ), op_arg_dat(p_res,0,pecell,4,"double",OP_INC), op_arg_dat(p_res,1,pecell,4,"double",OP_INC)); op_par_loop_bres_calc("bres_calc",bedges, op_arg_dat(p_x,0,pbedge,2,"double",OP_READ), op_arg_dat(p_x,1,pbedge,2,"double",OP_READ), op_arg_dat(p_q,0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pbecell,1,"double",OP_READ), op_arg_dat(p_res,0,pbecell,4,"double",OP_INC), op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ)); // update flow field rms = 0.0; op_par_loop_update("update",cells, op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_WRITE), op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&rms,1,"double",OP_INC)); } //print iteration history rms = sqrt(rms/(double) g_ncell); if (iter%100 == 0) op_printf("%d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); //output the result dat array to files op_print_dat_to_txtfile(p_q, "out_grid_mpi.dat"); //ASCI op_print_dat_to_binfile(p_q, "out_grid_mpi.bin"); //Binary //write given op_dat's indicated segment of data to a memory block in the order it was originally //arranged (i.e. before partitioning and reordering) double* q_part = (double *)op_malloc(sizeof(double)*op_get_size(cells)*4); op_fetch_data_idx(p_q, q_part, 0, op_get_size(cells)-1); free(q_part); op_timing_output(); op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
static void check_scan(int items_received, int items_expected) { if (items_received != items_expected) { op_printf("error reading from new_grid.dat\n"); exit(-1); } }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // read in grid op_printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("./new_grid.dat","r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (double *) malloc(2*nnode*sizeof(double)); q = (double *) malloc(4*ncell*sizeof(double)); qold = (double *) malloc(4*ncell*sizeof(double)); res = (double *) malloc(4*ncell*sizeof(double)); adt = (double *) malloc( ncell*sizeof(double)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%lf %lf \n",&x[2*n], &x[2*n+1]) != 2) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); // set constants and initialise flow field and residual op_printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = qinf[m]; res[4*n+m] = 0.0f; } } // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"double",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"double",q ,"p_q"); //op_dat p_qold = op_decl_dat(cells ,4,"double",qold ,"p_qold"); //op_dat p_adt = op_decl_dat(cells ,1,"double",adt ,"p_adt"); //op_dat p_res = op_decl_dat(cells ,4,"double",res ,"p_res"); // p_res, p_adt and p_qold now declared as a temp op_dats during // the execution of the time-marching loop op_decl_const2("gam",1,"double",&gam); op_decl_const2("gm1",1,"double",&gm1); op_decl_const2("cfl",1,"double",&cfl); op_decl_const2("eps",1,"double",&eps); op_decl_const2("mach",1,"double",&mach); op_decl_const2("alpha",1,"double",&alpha); op_decl_const2("qinf",4,"double",qinf); op_diagnostic_output(); double g_ncell = op_get_size(cells); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { double* tmp_elem = NULL; op_dat p_res = op_decl_dat_temp(cells ,4,"double",tmp_elem,"p_res"); op_dat p_adt = op_decl_dat_temp(cells ,1,"double",tmp_elem,"p_adt"); op_dat p_qold = op_decl_dat_temp(cells ,4,"double",qold ,"p_qold"); // save old flow solution op_par_loop_save_soln("save_soln",cells, op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop_adt_calc("adt_calc",cells, op_arg_dat(p_x,0,pcell,2,"double",OP_READ), op_arg_dat(p_x,1,pcell,2,"double",OP_READ), op_arg_dat(p_x,2,pcell,2,"double",OP_READ), op_arg_dat(p_x,3,pcell,2,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_WRITE)); // calculate flux residual op_par_loop_res_calc("res_calc",edges, op_arg_dat(p_x,0,pedge,2,"double",OP_READ), op_arg_dat(p_x,1,pedge,2,"double",OP_READ), op_arg_dat(p_q,0,pecell,4,"double",OP_READ), op_arg_dat(p_q,1,pecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pecell,1,"double",OP_READ), op_arg_dat(p_adt,1,pecell,1,"double",OP_READ), op_arg_dat(p_res,0,pecell,4,"double",OP_INC), op_arg_dat(p_res,1,pecell,4,"double",OP_INC)); op_par_loop_bres_calc("bres_calc",bedges, op_arg_dat(p_x,0,pbedge,2,"double",OP_READ), op_arg_dat(p_x,1,pbedge,2,"double",OP_READ), op_arg_dat(p_q,0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pbecell,1,"double",OP_READ), op_arg_dat(p_res,0,pbecell,4,"double",OP_INC), op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ)); // update flow field rms = 0.0; op_par_loop_update("update",cells, op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_WRITE), op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double)g_ncell ); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); if (iter%1000 == 0 && g_ncell == 720000){ //defailt mesh -- for validation testing //op_printf(" %d %3.16f \n",iter,rms); double diff=fabs((100.0*(rms/0.0001060114637578))-100.0); op_printf("\n\nTest problem with %d cells is within %3.15E %% of the expected solution\n",720000, diff); if(diff < 0.00001) { op_printf("This test is considered PASSED\n"); } else { op_printf("This test is considered FAILED\n"); } } if (op_free_dat_temp(p_res) < 0) op_printf("Error: temporary op_dat %s cannot be removed\n",p_res->name); if (op_free_dat_temp(p_adt) < 0) op_printf("Error: temporary op_dat %s cannot be removed\n",p_adt->name); if (op_free_dat_temp(p_qold) < 0) op_printf("Error: temporary op_dat %s cannot be removed\n",p_qold->name); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // set constants and initialise flow field and residual op_printf("initialising flow field \n"); char file[] = "new_grid.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell"); op_map m_test = op_decl_map_hdf5(cells, nodes,4, file, "m_test"); if (m_test == NULL) printf("m_test not found\n"); op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x"); op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q"); op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt"); op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res"); op_dat p_test = op_decl_dat_hdf5(cells ,4,"double",file,"p_test"); if (p_test == NULL) printf("p_test not found\n"); op_get_const_hdf5("gam", 1, "double", (char *)&gam, "new_grid.h5"); op_get_const_hdf5("gm1", 1, "double", (char *)&gm1, "new_grid.h5"); op_get_const_hdf5("cfl", 1, "double", (char *)&cfl, "new_grid.h5"); op_get_const_hdf5("eps", 1, "double", (char *)&eps, "new_grid.h5"); op_get_const_hdf5("mach", 1, "double", (char *)&mach, "new_grid.h5"); op_get_const_hdf5("alpha", 1, "double", (char *)&alpha, "new_grid.h5"); op_get_const_hdf5("qinf", 4, "double", (char *)&qinf, "new_grid.h5"); op_decl_const2("gam",1,"double",&gam); op_decl_const2("gm1",1,"double",&gm1); op_decl_const2("cfl",1,"double",&cfl); op_decl_const2("eps",1,"double",&eps); op_decl_const2("mach",1,"double",&mach); op_decl_const2("alpha",1,"double",&alpha); op_decl_const2("qinf",4,"double",qinf); op_diagnostic_output(); //write back original data just to compare you read the file correctly //do an h5diff between new_grid_out.h5 and new_grid.h5 to //compare two hdf5 files op_dump_to_hdf5("new_grid_out.h5"); op_write_const_hdf5("gam",1,"double",(char *)&gam, "new_grid_out.h5"); op_write_const_hdf5("gm1",1,"double",(char *)&gm1, "new_grid_out.h5"); op_write_const_hdf5("cfl",1,"double",(char *)&cfl, "new_grid_out.h5"); op_write_const_hdf5("eps",1,"double",(char *)&eps, "new_grid_out.h5"); op_write_const_hdf5("mach",1,"double",(char *)&mach, "new_grid_out.h5"); op_write_const_hdf5("alpha",1,"double",(char *)&alpha, "new_grid_out.h5"); op_write_const_hdf5("qinf",4,"double",(char *)qinf, "new_grid_out.h5"); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x); //op_partition("PARMETIS", "KWAY", edges, pecell, p_x); int g_ncell = op_get_size(cells); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop_save_soln("save_soln",cells, op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop_adt_calc("adt_calc",cells, op_arg_dat(p_x,0,pcell,2,"double",OP_READ), op_arg_dat(p_x,1,pcell,2,"double",OP_READ), op_arg_dat(p_x,2,pcell,2,"double",OP_READ), op_arg_dat(p_x,3,pcell,2,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_WRITE)); // calculate flux residual op_par_loop_res_calc("res_calc",edges, op_arg_dat(p_x,0,pedge,2,"double",OP_READ), op_arg_dat(p_x,1,pedge,2,"double",OP_READ), op_arg_dat(p_q,0,pecell,4,"double",OP_READ), op_arg_dat(p_q,1,pecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pecell,1,"double",OP_READ), op_arg_dat(p_adt,1,pecell,1,"double",OP_READ), op_arg_dat(p_res,0,pecell,4,"double",OP_INC), op_arg_dat(p_res,1,pecell,4,"double",OP_INC)); op_par_loop_bres_calc("bres_calc",bedges, op_arg_dat(p_x,0,pbedge,2,"double",OP_READ), op_arg_dat(p_x,1,pbedge,2,"double",OP_READ), op_arg_dat(p_q,0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pbecell,1,"double",OP_READ), op_arg_dat(p_res,0,pbecell,4,"double",OP_INC), op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ)); // update flow field rms = 0.0; op_par_loop_update("update",cells, op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_WRITE), op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double)g_ncell); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); //write given op_dat's indicated segment of data to a memory block in the order it was originally //arranged (i.e. before partitioning and reordering) double* q = (double *)op_malloc(sizeof(double)*op_get_size(cells)*4); op_fetch_data_idx(p_q, q, 0, op_get_size(cells)-1); free(q); //write given op_dat's data to hdf5 file in the order it was originally arranged (i.e. before partitioning and reordering) op_fetch_data_hdf5_file(p_q, "file_name.h5"); //printf("Root process = %d\n",op_is_root()); //output the result dat array to files //op_dump_to_hdf5("new_grid_out.h5"); //writes data as it is held on each process (under MPI) //compress using // ~/hdf5/bin/h5repack -f GZIP=9 new_grid.h5 new_grid_pack.h5 op_timing_output(); op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *bnode, *cell, *g_bnode, *g_cell; double *xm, *g_xm;; int nnode,ncell,nbnodes,niter, g_nnode, g_ncell, g_nbnodes; double rms = 1; // read in grid op_printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("FE_grid.dat","r")) == NULL) { op_printf("can't open file FE_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d \n",&g_nnode, &g_ncell, &g_nbnodes) != 3) { op_printf("error reading from new_grid.dat\n"); exit(-1); } if (my_rank == MPI_ROOT) { g_cell = (int *) malloc(4*g_ncell*sizeof(int)); g_bnode = (int *) malloc(g_nbnodes*sizeof(int)); g_xm = (double *) malloc(2*g_nnode*sizeof(double)); for (int n=0; n<g_nnode; n++) { if (fscanf(fp,"%lf %lf \n",&g_xm[2*n], &g_xm[2*n+1]) != 2) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<g_ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&g_cell[4*n ], &g_cell[4*n+1], &g_cell[4*n+2], &g_cell[4*n+3]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<g_nbnodes; n++) { if (fscanf(fp,"%d \n",&g_bnode[n]) != 1) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } } fclose(fp); nnode = compute_local_size (g_nnode, comm_size, my_rank); ncell = compute_local_size (g_ncell, comm_size, my_rank); nbnodes = compute_local_size (g_nbnodes, comm_size, my_rank); cell = (int *) malloc(4*ncell*sizeof(int)); bnode = (int *) malloc(nbnodes*sizeof(int)); xm = (double *) malloc(2*nnode*sizeof(double)); scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4); scatter_int_array(g_bnode, bnode, comm_size, g_nbnodes,nbnodes, 1); scatter_double_array(g_xm, xm, comm_size, g_nnode,nnode, 2); if(my_rank == MPI_ROOT) { free(g_cell); free(g_xm); free(g_bnode); } // set constants and initialise flow field and residual op_printf("initialising flow field \n"); double gam = 1.4; gm1 = gam - 1.0; gm1i = 1.0/gm1; wtg1[0] = 0.5; wtg1[1] = 0.5; xi1[0] = 0.211324865405187; xi1[1] = 0.788675134594813; Ng1[0] = 0.788675134594813; Ng1[1] = 0.211324865405187; Ng1[2] = 0.211324865405187; Ng1[3] = 0.788675134594813; Ng1_xi[0] = -1; Ng1_xi[1] = -1; Ng1_xi[2] = 1; Ng1_xi[3] = 1; wtg2[0] = 0.25; wtg2[1] = 0.25; wtg2[2] = 0.25; wtg2[3] = 0.25; Ng2[0] = 0.622008467928146; Ng2[1] = 0.166666666666667; Ng2[2] = 0.166666666666667; Ng2[3] = 0.044658198738520; Ng2[4] = 0.166666666666667; Ng2[5] = 0.622008467928146; Ng2[6] = 0.044658198738520; Ng2[7] = 0.166666666666667; Ng2[8] = 0.166666666666667; Ng2[9] = 0.044658198738520; Ng2[10] = 0.622008467928146; Ng2[11] = 0.166666666666667; Ng2[12] = 0.044658198738520; Ng2[13] = 0.166666666666667; Ng2[14] = 0.166666666666667; Ng2[15] = 0.622008467928146; Ng2_xi[0] = -0.788675134594813; Ng2_xi[1] = 0.788675134594813; Ng2_xi[2] = -0.211324865405187;Ng2_xi[3] = 0.211324865405187; Ng2_xi[4] = -0.788675134594813; Ng2_xi[5] = 0.788675134594813; Ng2_xi[6] = -0.211324865405187; Ng2_xi[7] = 0.211324865405187; Ng2_xi[8] = -0.211324865405187; Ng2_xi[9] = 0.211324865405187; Ng2_xi[10] = -0.788675134594813; Ng2_xi[11] = 0.788675134594813; Ng2_xi[12] = -0.211324865405187; Ng2_xi[13] = 0.211324865405187; Ng2_xi[14] = -0.788675134594813; Ng2_xi[15] = 0.788675134594813; Ng2_xi[16] = -0.788675134594813; Ng2_xi[17] = -0.211324865405187; Ng2_xi[18] = 0.788675134594813; Ng2_xi[19] = 0.211324865405187; Ng2_xi[20] = -0.211324865405187; Ng2_xi[21] = -0.788675134594813; Ng2_xi[22] = 0.211324865405187; Ng2_xi[23] = 0.788675134594813; Ng2_xi[24] = -0.788675134594813; Ng2_xi[25] = -0.211324865405187; Ng2_xi[26] = 0.788675134594813; Ng2_xi[27] = 0.211324865405187; Ng2_xi[28] = -0.211324865405187; Ng2_xi[29] = -0.788675134594813; Ng2_xi[30] = 0.211324865405187; Ng2_xi[31] = 0.788675134594813; minf = 0.1; m2 = minf*minf; freq = 1; kappa = 1; nmode = 0; mfan = 1.0; double *phim = (double *)malloc(nnode*sizeof(double)); memset(phim,0,nnode*sizeof(double)); for (int i = 0;i<nnode;i++) { phim[i] = minf*xm[2*i]; } double *K = (double *)malloc(4*4*ncell*sizeof(double)); memset(K,0,4*4*ncell*sizeof(double)); double *resm = (double *)malloc(nnode*sizeof(double)); memset(resm,0,nnode*sizeof(double)); double *V = (double *)malloc(nnode*sizeof(double)); memset(V,0,nnode*sizeof(double)); double *P = (double *)malloc(nnode*sizeof(double)); memset(P,0,nnode*sizeof(double)); double *U = (double *)malloc(nnode*sizeof(double)); memset(U,0,nnode*sizeof(double)); // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set bnodes = op_decl_set(nbnodes, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pbnodes = op_decl_map(bnodes,nodes,1,bnode, "pbedge"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_xm = op_decl_dat(nodes ,2,"double",xm ,"p_x"); op_dat p_phim = op_decl_dat(nodes, 1, "double", phim, "p_phim"); op_dat p_resm = op_decl_dat(nodes, 1, "double", resm, "p_resm"); op_dat p_K = op_decl_dat(cells, 16, "double:soa", K, "p_K"); op_dat p_V = op_decl_dat(nodes, 1, "double", V, "p_V"); op_dat p_P = op_decl_dat(nodes, 1, "double", P, "p_P"); op_dat p_U = op_decl_dat(nodes, 1, "double", U, "p_U"); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&gm1i ); op_decl_const(1,"double",&m2 ); op_decl_const(2,"double",wtg1 ); op_decl_const(2,"double",xi1 ); op_decl_const(4,"double",Ng1 ); op_decl_const(4,"double",Ng1_xi ); op_decl_const(4,"double",wtg2 ); op_decl_const(16,"double",Ng2 ); op_decl_const(32,"double",Ng2_xi ); op_decl_const(1,"double",&minf ); op_decl_const(1,"double",&freq ); op_decl_const(1,"double",&kappa ); op_decl_const(1,"double",&nmode ); op_decl_const(1,"double",&mfan ); op_diagnostic_output(); op_partition("PTSCOTCH", "KWAY", cells, pcell, NULL); // main time-marching loop niter = 20; //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); for(int iter=1; iter<=niter; iter++) { op_par_loop(res_calc,"res_calc",cells, op_arg_dat(p_xm, -4, pcell, 2,"double",OP_READ), op_arg_dat(p_phim, -4, pcell, 1,"double",OP_READ), op_arg_dat(p_K, -1, OP_ID, 16,"double:soa",OP_WRITE), op_arg_dat(p_resm, -4, pcell, 1,"double",OP_INC) ); op_par_loop(dirichlet,"dirichlet",bnodes, op_arg_dat(p_resm, 0, pbnodes, 1,"double",OP_WRITE)); double c1 = 0; double c2 = 0; double c3 = 0; double alpha = 0; double beta = 0; //c1 = R'*R; op_par_loop(init_cg, "init_cg", nodes, op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&c1, 1, "double", OP_INC), op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_WRITE), op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_WRITE), op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_WRITE)); //set up stopping conditions double res0 = sqrt(c1); double res = res0; int inner_iter = 0; int maxiter = 200; while (res > 0.1*res0 && inner_iter < maxiter) { //V = Stiffness*P op_par_loop(spMV, "spMV", cells, op_arg_dat(p_V, -4, pcell, 1, "double", OP_INC), op_arg_dat(p_K, -1, OP_ID, 16, "double:soa", OP_READ), op_arg_dat(p_P, -4, pcell, 1, "double", OP_READ)); op_par_loop(dirichlet,"dirichlet",bnodes, op_arg_dat(p_V, 0, pbnodes, 1,"double",OP_WRITE)); c2 = 0; //c2 = P'*V; op_par_loop(dotPV, "dotPV", nodes, op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_READ), op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&c2, 1, "double", OP_INC)); alpha = c1/c2; //U = U + alpha*P; //resm = resm-alpha*V; op_par_loop(updateUR, "updateUR", nodes, op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_INC), op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_INC), op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_READ), op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_RW), op_arg_gbl(&alpha, 1, "double", OP_READ)); c3 = 0; //c3 = resm'*resm; op_par_loop(dotR, "dotR", nodes, op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&c3, 1, "double", OP_INC)); beta = c3/c1; //P = beta*P+resm; op_par_loop(updateP, "updateP", nodes, op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ), op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_RW), op_arg_gbl(&beta, 1, "double", OP_READ)); c1 = c3; res = sqrt(c1); inner_iter++; } rms = 0; //phim = phim - Stiffness\Load; op_par_loop(update, "update", nodes, op_arg_dat(p_phim, -1, OP_ID, 1, "double", OP_RW), op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_WRITE), op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&rms, 1, "double", OP_INC)); op_printf("rms = %10.5e iter: %d\n", sqrt(rms)/sqrt(g_nnode), inner_iter); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); /*free(cell); free(bnode); free(xm); free(phim); free(K); free(resm); free(V); free(P); free(U);*/ }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int *becell, *ecell, *bound, *bedge, *edge, *cell; float *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; float rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // read in grid op_printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("./new_grid.dat","r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (float *) malloc(2*nnode*sizeof(float)); q = (float *) malloc(4*ncell*sizeof(float)); qold = (float *) malloc(4*ncell*sizeof(float)); res = (float *) malloc(4*ncell*sizeof(float)); adt = (float *) malloc( ncell*sizeof(float)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%f %f \n",&x[2*n], &x[2*n+1]) != 2) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); // set constants and initialise flow field and residual op_printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; float mach = 0.4f; float alpha = 3.0f*atan(1.0f)/45.0f; float p = 1.0f; float r = 1.0f; float u = sqrt(gam*p/r)*mach; float e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = qinf[m]; res[4*n+m] = 0.0f; } } // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"float",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"float",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"float",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"float",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"float",res ,"p_res"); op_decl_const2("gam",1,"float",&gam); op_decl_const2("gm1",1,"float",&gm1); op_decl_const2("cfl",1,"float",&cfl); op_decl_const2("eps",1,"float",&eps); op_decl_const2("mach",1,"float",&mach); op_decl_const2("alpha",1,"float",&alpha); op_decl_const2("qinf",4,"float",qinf); op_diagnostic_output(); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop_save_soln("save_soln",cells, op_arg_dat(p_q,-1,OP_ID,4,"float",OP_READ), op_arg_dat(p_qold,-1,OP_ID,4,"float",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop_adt_calc("adt_calc",cells, op_arg_dat(p_x,0,pcell,2,"float",OP_READ), op_arg_dat(p_x,1,pcell,2,"float",OP_READ), op_arg_dat(p_x,2,pcell,2,"float",OP_READ), op_arg_dat(p_x,3,pcell,2,"float",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"float",OP_READ), op_arg_dat(p_adt,-1,OP_ID,1,"float",OP_WRITE)); // calculate flux residual op_par_loop_res_calc("res_calc",edges, op_arg_dat(p_x,0,pedge,2,"float",OP_READ), op_arg_dat(p_x,1,pedge,2,"float",OP_READ), op_arg_dat(p_q,0,pecell,4,"float",OP_READ), op_arg_dat(p_q,1,pecell,4,"float",OP_READ), op_arg_dat(p_adt,0,pecell,1,"float",OP_READ), op_arg_dat(p_adt,1,pecell,1,"float",OP_READ), op_arg_dat(p_res,0,pecell,4,"float",OP_INC), op_arg_dat(p_res,1,pecell,4,"float",OP_INC)); op_par_loop_bres_calc("bres_calc",bedges, op_arg_dat(p_x,0,pbedge,2,"float",OP_READ), op_arg_dat(p_x,1,pbedge,2,"float",OP_READ), op_arg_dat(p_q,0,pbecell,4,"float",OP_READ), op_arg_dat(p_adt,0,pbecell,1,"float",OP_READ), op_arg_dat(p_res,0,pbecell,4,"float",OP_INC), op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ)); // update flow field rms = 0.0; op_par_loop_update("update",cells, op_arg_dat(p_qold,-1,OP_ID,4,"float",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"float",OP_WRITE), op_arg_dat(p_res,-1,OP_ID,4,"float",OP_RW), op_arg_dat(p_adt,-1,OP_ID,1,"float",OP_READ), op_arg_gbl(&rms,1,"float",OP_INC)); } // print iteration history rms = sqrt(rms/(float) op_get_size(cells)); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // set constants and initialise flow field and residual op_printf("initialising flow field \n"); char file[] = "new_grid.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell"); op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x"); op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q"); op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt"); op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res"); op_get_const_hdf5("gam", 1, "double", (char *)&gam, "new_grid.h5"); op_get_const_hdf5("gm1", 1, "double", (char *)&gm1, "new_grid.h5"); op_get_const_hdf5("cfl", 1, "double", (char *)&cfl, "new_grid.h5"); op_get_const_hdf5("eps", 1, "double", (char *)&eps, "new_grid.h5"); op_get_const_hdf5("mach", 1, "double", (char *)&mach, "new_grid.h5"); op_get_const_hdf5("alpha", 1, "double", (char *)&alpha, "new_grid.h5"); op_get_const_hdf5("qinf", 4, "double", (char *)&qinf, "new_grid.h5"); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&cfl ); op_decl_const(1,"double",&eps ); op_decl_const(1,"double",&mach ); op_decl_const(1,"double",&alpha); op_decl_const(4,"double",qinf ); op_diagnostic_output(); //write back original data just to compare you read the file correctly //do an h5diff between new_grid_out.h5 and new_grid.h5 to //compare two hdf5 files op_write_hdf5("new_grid_out.h5"); op_write_const_hdf5("gam",1,"double",(char *)&gam, "new_grid_out.h5"); op_write_const_hdf5("gm1",1,"double",(char *)&gm1, "new_grid_out.h5"); op_write_const_hdf5("cfl",1,"double",(char *)&cfl, "new_grid_out.h5"); op_write_const_hdf5("eps",1,"double",(char *)&eps, "new_grid_out.h5"); op_write_const_hdf5("mach",1,"double",(char *)&mach, "new_grid_out.h5"); op_write_const_hdf5("alpha",1,"double",(char *)&alpha, "new_grid_out.h5"); op_write_const_hdf5("qinf",4,"double",(char *)qinf, "new_grid_out.h5"); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x); int g_ncell = op_get_size(cells); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, -4,pcell, 2,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE)); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, -2,pedge, 2,"double",OP_READ), op_arg_dat(p_q, -2,pecell,4,"double",OP_READ), op_arg_dat(p_adt, -2,pecell,1,"double",OP_READ), op_arg_dat(p_res, -2,pecell,4,"double",OP_INC )); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, -2,pbedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"double",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double)g_ncell); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1); op_exit(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc, argv, 2); // MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); // timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *pp; float *A, *r, *u, *du; int nnode, nedge; /**------------------------BEGIN I/O and PARTITIONING ---------------------**/ int g_nnode, g_nedge, g_n, g_e; g_nnode = (NN - 1) * (NN - 1); g_nedge = (NN - 1) * (NN - 1) + 4 * (NN - 1) * (NN - 2); int *g_pp = 0; float *g_A = 0, *g_r = 0, *g_u = 0, *g_du = 0; op_printf("Global number of nodes, edges = %d, %d\n", g_nnode, g_nedge); if (my_rank == MPI_ROOT) { g_pp = (int *)malloc(sizeof(int) * 2 * g_nedge); g_A = (float *)malloc(sizeof(float) * g_nedge); g_r = (float *)malloc(sizeof(float) * g_nnode); g_u = (float *)malloc(sizeof(float) * g_nnode); g_du = (float *)malloc(sizeof(float) * g_nnode); // create matrix and r.h.s., and set coordinates needed for renumbering / // partitioning g_e = 0; for (int i = 1; i < NN; i++) { for (int j = 1; j < NN; j++) { g_n = i - 1 + (j - 1) * (NN - 1); g_r[g_n] = 0.0f; g_u[g_n] = 0.0f; g_du[g_n] = 0.0f; g_pp[2 * g_e] = g_n; g_pp[2 * g_e + 1] = g_n; g_A[g_e] = -1.0f; g_e++; for (int pass = 0; pass < 4; pass++) { int i2 = i; int j2 = j; if (pass == 0) i2 += -1; if (pass == 1) i2 += 1; if (pass == 2) j2 += -1; if (pass == 3) j2 += 1; if ((i2 == 0) || (i2 == NN) || (j2 == 0) || (j2 == NN)) { g_r[g_n] += 0.25f; } else { g_pp[2 * g_e] = g_n; g_pp[2 * g_e + 1] = i2 - 1 + (j2 - 1) * (NN - 1); g_A[g_e] = 0.25f; g_e++; } } } } } /* Compute local sizes */ nnode = compute_local_size(g_nnode, comm_size, my_rank); nedge = compute_local_size(g_nedge, comm_size, my_rank); op_printf("Number of nodes, edges on process %d = %d, %d\n", my_rank, nnode, nedge); /*Allocate memory to hold local sets, mapping tables and data*/ pp = (int *)malloc(2 * sizeof(int) * nedge); A = (float *)malloc(nedge * sizeof(float)); r = (float *)malloc(nnode * sizeof(float)); u = (float *)malloc(nnode * sizeof(float)); du = (float *)malloc(nnode * sizeof(float)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_pp, pp, comm_size, g_nedge, nedge, 2); scatter_float_array(g_A, A, comm_size, g_nedge, nedge, 1); scatter_float_array(g_r, r, comm_size, g_nnode, nnode, 1); scatter_float_array(g_u, u, comm_size, g_nnode, nnode, 1); scatter_float_array(g_du, du, comm_size, g_nnode, nnode, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if (my_rank == MPI_ROOT) { free(g_pp); free(g_A); free(g_r); free(g_u); free(g_du); } /**------------------------END I/O and PARTITIONING ---------------------**/ // declare sets, pointers, and datasets op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_map ppedge = op_decl_map(edges, nodes, 2, pp, "ppedge"); op_dat p_A = op_decl_dat(edges, 1, "float", A, "p_A"); op_dat p_r = op_decl_dat(nodes, 1, "float", r, "p_r"); op_dat p_u = op_decl_dat(nodes, 1, "float", u, "p_u"); op_dat p_du = op_decl_dat(nodes, 1, "float", du, "p_du"); alpha = 1.0f; op_decl_const(1, "float", &alpha); op_diagnostic_output(); // trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", NULL, NULL, NULL); // initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main iteration loop float u_sum, u_max, beta = 1.0f; for (int iter = 0; iter < NITER; iter++) { op_par_loop(res, "res", edges, op_arg_dat(p_A, -1, OP_ID, 1, "float", OP_READ), op_arg_dat(p_u, 1, ppedge, 1, "float", OP_READ), op_arg_dat(p_du, 0, ppedge, 1, "float", OP_INC), op_arg_gbl(&beta, 1, "float", OP_READ)); u_sum = 0.0f; u_max = 0.0f; op_par_loop(update, "update", nodes, op_arg_dat(p_r, -1, OP_ID, 1, "float", OP_READ), op_arg_dat(p_du, -1, OP_ID, 1, "float", OP_RW), op_arg_dat(p_u, -1, OP_ID, 1, "float", OP_INC), op_arg_gbl(&u_sum, 1, "float", OP_INC), op_arg_gbl(&u_max, 1, "float", OP_MAX)); op_printf("\n u max/rms = %f %f \n\n", u_max, sqrt(u_sum / g_nnode)); } op_timers(&cpu_t2, &wall_t2); // get results data array op_fetch_data(p_u, u); // output the result dat array to files op_print_dat_to_txtfile(p_u, "out_grid_mpi.dat"); // ASCI op_print_dat_to_binfile(p_u, "out_grid_mpi.bin"); // Binary printf("solution on rank %d\n", my_rank); for (int i = 0; i < nnode; i++) { printf(" %7.4f", u[i]); fflush(stdout); } printf("\n"); // print each mpi process's timing info for each kernel op_timing_output(); // print total time for niter interations op_printf("Max total runtime = %f\n", wall_t2 - wall_t1); // gather results from all ranks and check float *ug = (float *)malloc(sizeof(float) * op_get_size(nodes)); op_fetch_data_idx(p_u, ug, 0, op_get_size(nodes) - 1); int result = check_result<float>(ug, NN, TOLERANCE); free(ug); op_exit(); free(u); free(pp); free(A); free(r); free(du); return result; }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); int *becell, *ecell, *bound, *bedge, *edge, *cell; float *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge; /**------------------------BEGIN I/O -------------------**/ char file[] = "new_grid.dat"; char file_out[] = "new_grid_out.h5"; /* read in grid from disk on root processor */ FILE *fp; if ( (fp = fopen(file,"r")) == NULL) { op_printf("can't open file %s\n",file); exit(-1); } int g_nnode,g_ncell,g_nedge,g_nbedge; check_scan(fscanf(fp,"%d %d %d %d \n",&g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4); int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0; float *g_x = 0,*g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0; // set constants op_printf("initialising flow field\n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; float mach = 0.4f; float alpha = 3.0f*atan(1.0f)/45.0f; float p = 1.0f; float r = 1.0f; float u = sqrt(gam*p/r)*mach; float e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; op_printf("reading in grid \n"); op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n" ,g_nnode,g_ncell,g_nedge,g_nbedge); if(my_rank == MPI_ROOT) { g_cell = (int *) malloc(4*g_ncell*sizeof(int)); g_edge = (int *) malloc(2*g_nedge*sizeof(int)); g_ecell = (int *) malloc(2*g_nedge*sizeof(int)); g_bedge = (int *) malloc(2*g_nbedge*sizeof(int)); g_becell = (int *) malloc( g_nbedge*sizeof(int)); g_bound = (int *) malloc( g_nbedge*sizeof(int)); g_x = (float *) malloc(2*g_nnode*sizeof(float)); g_q = (float *) malloc(4*g_ncell*sizeof(float)); g_qold = (float *) malloc(4*g_ncell*sizeof(float)); g_res = (float *) malloc(4*g_ncell*sizeof(float)); g_adt = (float *) malloc( g_ncell*sizeof(float)); for (int n=0; n<g_nnode; n++){ check_scan(fscanf(fp,"%f %f \n",&g_x[2*n], &g_x[2*n+1]), 2); } for (int n=0; n<g_ncell; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_cell[4*n ], &g_cell[4*n+1], &g_cell[4*n+2], &g_cell[4*n+3]), 4); } for (int n=0; n<g_nedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_edge[2*n],&g_edge[2*n+1], &g_ecell[2*n],&g_ecell[2*n+1]), 4); } for (int n=0; n<g_nbedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_bedge[2*n],&g_bedge[2*n+1], &g_becell[n],&g_bound[n]), 4); } //initialise flow field and residual for (int n=0; n<g_ncell; n++) { for (int m=0; m<4; m++) { g_q[4*n+m] = qinf[m]; g_res[4*n+m] = 0.0f; } } } fclose(fp); nnode = compute_local_size (g_nnode, comm_size, my_rank); ncell = compute_local_size (g_ncell, comm_size, my_rank); nedge = compute_local_size (g_nedge, comm_size, my_rank); nbedge = compute_local_size (g_nbedge, comm_size, my_rank); op_printf("Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n" ,my_rank,nnode,ncell,nedge,nbedge); /*Allocate memory to hold local sets, mapping tables and data*/ cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (float *) malloc(2*nnode*sizeof(float)); q = (float *) malloc(4*ncell*sizeof(float)); qold = (float *) malloc(4*ncell*sizeof(float)); res = (float *) malloc(4*ncell*sizeof(float)); adt = (float *) malloc( ncell*sizeof(float)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4); scatter_int_array(g_edge, edge, comm_size, g_nedge,nedge, 2); scatter_int_array(g_ecell, ecell, comm_size, g_nedge,nedge, 2); scatter_int_array(g_bedge, bedge, comm_size, g_nbedge,nbedge, 2); scatter_int_array(g_becell, becell, comm_size, g_nbedge,nbedge, 1); scatter_int_array(g_bound, bound, comm_size, g_nbedge,nbedge, 1); scatter_float_array(g_x, x, comm_size, g_nnode,nnode, 2); scatter_float_array(g_q, q, comm_size, g_ncell,ncell, 4); scatter_float_array(g_qold, qold, comm_size, g_ncell,ncell, 4); scatter_float_array(g_res, res, comm_size, g_ncell,ncell, 4); scatter_float_array(g_adt, adt, comm_size, g_ncell,ncell, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if(my_rank == MPI_ROOT) { free(g_cell); free(g_edge); free(g_ecell); free(g_bedge); free(g_becell); free(g_bound); free(g_x ); free(g_q); free(g_qold); free(g_adt); free(g_res); } /**------------------------END I/O -----------------------**/ /* FIXME: It's not clear to the compiler that sth. is going on behind the scenes here. Hence theses variables are reported as unused */ op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"float",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"float",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"float",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"float",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"float",res ,"p_res"); op_decl_const(1,"float",&gam ); op_decl_const(1,"float",&gm1 ); op_decl_const(1,"float",&cfl ); op_decl_const(1,"float",&eps ); op_decl_const(1,"float",&mach ); op_decl_const(1,"float",&alpha); op_decl_const(4,"float",qinf ); op_dump_to_hdf5(file_out); op_write_const_hdf5("gam", 1,"float",(char *)&gam, "new_grid_out.h5"); op_write_const_hdf5("gm1", 1,"float",(char *)&gm1, "new_grid_out.h5"); op_write_const_hdf5("cfl", 1,"float",(char *)&cfl, "new_grid_out.h5"); op_write_const_hdf5("eps", 1,"float",(char *)&eps, "new_grid_out.h5"); op_write_const_hdf5("mach", 1,"float",(char *)&mach, "new_grid_out.h5"); op_write_const_hdf5("alpha",1,"float",(char *)&alpha,"new_grid_out.h5"); op_write_const_hdf5("qinf", 4,"float",(char *)qinf, "new_grid_out.h5"); //create halos - for sanity check op_halo_create(); op_exit(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc, argv, 2); int niter; float rms; // timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // set constants and initialise flow field and residual op_printf("initialising flow field \n"); char file[] = "new_grid.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells, 2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges, nodes, 2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges, cells, 1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes, 4, file, "pcell"); op_dat p_bound = op_decl_dat_hdf5(bedges, 1, "int", file, "p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes, 2, "float", file, "p_x"); op_dat p_q = op_decl_dat_hdf5(cells, 4, "float", file, "p_q"); op_dat p_qold = op_decl_dat_hdf5(cells, 4, "float", file, "p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells, 1, "float", file, "p_adt"); op_dat p_res = op_decl_dat_hdf5(cells, 4, "float", file, "p_res"); op_get_const_hdf5("gam", 1, "float", (char *)&gam, "new_grid.h5"); op_get_const_hdf5("gm1", 1, "float", (char *)&gm1, "new_grid.h5"); op_get_const_hdf5("cfl", 1, "float", (char *)&cfl, "new_grid.h5"); op_get_const_hdf5("eps", 1, "float", (char *)&eps, "new_grid.h5"); op_get_const_hdf5("mach", 1, "float", (char *)&mach, "new_grid.h5"); op_get_const_hdf5("alpha", 1, "float", (char *)&alpha, "new_grid.h5"); op_get_const_hdf5("qinf", 4, "float", (char *)&qinf, "new_grid.h5"); op_decl_const2("gam", 1, "float", &gam); op_decl_const2("gm1", 1, "float", &gm1); op_decl_const2("cfl", 1, "float", &cfl); op_decl_const2("eps", 1, "float", &eps); op_decl_const2("mach", 1, "float", &mach); op_decl_const2("alpha", 1, "float", &alpha); op_decl_const2("qinf", 4, "float", qinf); if (op_is_root()) op_diagnostic_output(); // trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x); // op_partition("PARMETIS", "KWAY", edges, pecell, p_x); int g_ncell = op_get_size(cells); // initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for (int iter = 1; iter <= niter; iter++) { // save old flow solution op_par_loop_save_soln("save_soln", cells, op_arg_dat(p_q, -1, OP_ID, 4, "float", OP_READ), op_arg_dat(p_qold, -1, OP_ID, 4, "float", OP_WRITE)); // predictor/corrector update loop for (int k = 0; k < 2; k++) { // calculate area/timstep op_par_loop_adt_calc("adt_calc", cells, op_arg_dat(p_x, 0, pcell, 2, "float", OP_READ), op_arg_dat(p_x, 1, pcell, 2, "float", OP_READ), op_arg_dat(p_x, 2, pcell, 2, "float", OP_READ), op_arg_dat(p_x, 3, pcell, 2, "float", OP_READ), op_arg_dat(p_q, -1, OP_ID, 4, "float", OP_READ), op_arg_dat(p_adt, -1, OP_ID, 1, "float", OP_WRITE)); // calculate flux residual op_par_loop_res_calc("res_calc", edges, op_arg_dat(p_x, 0, pedge, 2, "float", OP_READ), op_arg_dat(p_x, 1, pedge, 2, "float", OP_READ), op_arg_dat(p_q, 0, pecell, 4, "float", OP_READ), op_arg_dat(p_q, 1, pecell, 4, "float", OP_READ), op_arg_dat(p_adt, 0, pecell, 1, "float", OP_READ), op_arg_dat(p_adt, 1, pecell, 1, "float", OP_READ), op_arg_dat(p_res, 0, pecell, 4, "float", OP_INC), op_arg_dat(p_res, 1, pecell, 4, "float", OP_INC)); op_par_loop_bres_calc("bres_calc", bedges, op_arg_dat(p_x, 0, pbedge, 2, "float", OP_READ), op_arg_dat(p_x, 1, pbedge, 2, "float", OP_READ), op_arg_dat(p_q, 0, pbecell, 4, "float", OP_READ), op_arg_dat(p_adt, 0, pbecell, 1, "float", OP_READ), op_arg_dat(p_res, 0, pbecell, 4, "float", OP_INC), op_arg_dat(p_bound, -1, OP_ID, 1, "int", OP_READ)); // update flow field rms = 0.0; op_par_loop_update("update", cells, op_arg_dat(p_qold, -1, OP_ID, 4, "float", OP_READ), op_arg_dat(p_q, -1, OP_ID, 4, "float", OP_WRITE), op_arg_dat(p_res, -1, OP_ID, 4, "float", OP_RW), op_arg_dat(p_adt, -1, OP_ID, 1, "float", OP_READ), op_arg_gbl(&rms, 1, "float", OP_INC)); } // print iteration history rms = sqrtf(rms / (float)g_ncell); if (iter % 100 == 0) op_printf(" %d %10.5e \n", iter, rms); if (iter % 1000 == 0 && g_ncell == 720000) { // defailt mesh -- for validation testing op_printf(" %d %3.16f \n", iter, rms); float diff = fabsf((100.0 * (rms / 0.000105987)) - 100.0); op_printf("\n\nTest problem with %d cells is within %3.15E %% of the " "expected solution\n", 720000, diff); if (diff < 0.1) { op_printf("This test is considered PASSED\n"); } else { op_printf("This test is considered FAILED\n"); } } } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = %f\n", wall_t2 - wall_t1); op_exit(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc, argv, 5); // timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int nnode, nedge, n, e; nnode = (NN - 1) * (NN - 1); nedge = (NN - 1) * (NN - 1) + 4 * (NN - 1) * (NN - 2); int *pp = (int *)malloc(sizeof(int) * 2 * nedge); double *A = (double *)malloc(sizeof(double) * nedge); double *r = (double *)malloc(sizeof(double) * nnode); double *u = (double *)malloc(sizeof(double) * nnode); double *du = (double *)malloc(sizeof(double) * nnode); // create matrix and r.h.s., and set coordinates needed for renumbering / // partitioning e = 0; for (int i = 1; i < NN; i++) { for (int j = 1; j < NN; j++) { n = i - 1 + (j - 1) * (NN - 1); r[n] = 0.0f; u[n] = 0.0f; du[n] = 0.0f; pp[2 * e] = n; pp[2 * e + 1] = n; A[e] = -1.0f; e++; for (int pass = 0; pass < 4; pass++) { int i2 = i; int j2 = j; if (pass == 0) i2 += -1; if (pass == 1) i2 += 1; if (pass == 2) j2 += -1; if (pass == 3) j2 += 1; if ((i2 == 0) || (i2 == NN) || (j2 == 0) || (j2 == NN)) { r[n] += 0.25f; } else { pp[2 * e] = n; pp[2 * e + 1] = i2 - 1 + (j2 - 1) * (NN - 1); A[e] = 0.25f; e++; } } } } // declare sets, pointers, and datasets op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_map ppedge = op_decl_map(edges, nodes, 2, pp, "ppedge"); op_dat p_A = op_decl_dat(edges, 1, "double", A, "p_A"); op_dat p_r = op_decl_dat(nodes, 1, "double", r, "p_r"); op_dat p_u = op_decl_dat(nodes, 1, "double", u, "p_u"); op_dat p_du = op_decl_dat(nodes, 1, "double", du, "p_du"); alpha = 1.0f; op_decl_const2("alpha", 1, "double", &alpha); op_diagnostic_output(); // initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main iteration loop double u_sum, u_max, beta = 1.0f; for (int iter = 0; iter < NITER; iter++) { op_par_loop_res("res", edges, op_arg_dat(p_A, -1, OP_ID, 1, "double", OP_READ), op_arg_dat(p_u, 1, ppedge, 1, "double", OP_READ), op_arg_dat(p_du, 0, ppedge, 1, "double", OP_INC), op_arg_gbl(&beta, 1, "double", OP_READ)); u_sum = 0.0f; u_max = 0.0f; op_par_loop_update("update", nodes, op_arg_dat(p_r, -1, OP_ID, 1, "double", OP_READ), op_arg_dat(p_du, -1, OP_ID, 1, "double", OP_RW), op_arg_dat(p_u, -1, OP_ID, 1, "double", OP_INC), op_arg_gbl(&u_sum, 1, "double", OP_INC), op_arg_gbl(&u_max, 1, "double", OP_MAX)); op_printf("\n u max/rms = %f %f \n\n", u_max, sqrt(u_sum / nnode)); } op_timers(&cpu_t2, &wall_t2); // print out results op_printf("\n Results after %d iterations:\n\n", NITER); op_fetch_data(p_u, u); for (int pass = 0; pass < 1; pass++) { for (int j = NN - 1; j > 0; j--) { for (int i = 1; i < NN; i++) { if (pass == 0) op_printf(" %7.4f", u[i - 1 + (j - 1) * (NN - 1)]); else if (pass == 1) op_printf(" %7.4f", du[i - 1 + (j - 1) * (NN - 1)]); else if (pass == 2) op_printf(" %7.4f", r[i - 1 + (j - 1) * (NN - 1)]); } op_printf("\n"); } op_printf("\n"); } op_timing_output(); // print total time for niter interations op_printf("Max total runtime = %f\n", wall_t2 - wall_t1); int result = check_result<double>(u, NN, TOLERANCE); op_exit(); free(pp); free(A); free(u); free(du); free(r); return result; }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // set constants and initialise flow field and residual op_printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; char file[] = "new_grid.h5";//"new_grid-26mil.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell"); op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x"); op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q"); op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt"); op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res"); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&cfl ); op_decl_const(1,"double",&eps ); op_decl_const(1,"double",&mach ); op_decl_const(1,"double",&alpha); op_decl_const(4,"double",qinf ); op_diagnostic_output(); int g_ncell = op_get_size(cells); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE)); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pecell,4,"double",OP_READ), op_arg_dat(p_q, 1,pecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"double",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pecell,4,"double",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"double",OP_INC )); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"double",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double)g_ncell); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1); op_exit(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // read in airfoil grid op_printf("reading in data \n"); FILE *fp; if ( (fp = fopen("./new_grid.dat","r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (double *) malloc(2*nnode*sizeof(double)); q = (double *) malloc(4*ncell*sizeof(double)); qold = (double *) malloc(4*ncell*sizeof(double)); res = (double *) malloc(4*ncell*sizeof(double)); adt = (double *) malloc( ncell*sizeof(double)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%lf %lf \n",&x[2*n], &x[2*n+1]) != 2) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); // declare sets, pointers, datasets op_set edges = op_decl_set(nedge, "edges"); op_set cells = op_decl_set(ncell, "cells"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_dat p_res = op_decl_dat(cells ,4,"double",res ,"p_res"); int count; op_diagnostic_output(); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); //indirect reduction count = 0; op_par_loop_res_calc("res_calc",edges, op_arg_dat(p_res,0,pecell,4,"double",OP_INC), op_arg_gbl(&count,1,"int",OP_INC)); op_printf("number of edges:: %d should be: %d \n",count,nedge); if (count != nedge) op_printf("indirect reduction FAILED\n"); else op_printf("indirect reduction PASSED\n"); //direct reduction count = 0; op_par_loop_update("update",cells, op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW), op_arg_gbl(&count,1,"int",OP_INC)); op_printf("number of cells: %d should be: %d \n",count,ncell); if (count != ncell) op_printf("direct reduction FAILED\n"); else op_printf("direct reduction PASSED\n"); op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }