//extern "C" op_set op_decl_set_f ( int size, char const * name ) { char * heapName = (char *) calloc ( strlen ( name ), sizeof ( char ) ); strncpy ( heapName, name, strlen ( name ) ); return op_decl_set ( size, heapName ); }
op_set op_decl_set_hdf5(char const *file, char const *name) { //create new communicator int my_rank, comm_size; MPI_Comm_dup(MPI_COMM_WORLD, &OP_MPI_HDF5_WORLD); MPI_Comm_rank(OP_MPI_HDF5_WORLD, &my_rank); MPI_Comm_size(OP_MPI_HDF5_WORLD, &comm_size); //MPI variables MPI_Info info = MPI_INFO_NULL; //HDF5 APIs definitions hid_t file_id; //file identifier hid_t plist_id; //property list identifier hid_t dset_id; //dataset identifier //Set up file access property list with parallel I/O access plist_id = H5Pcreate(H5P_FILE_ACCESS); H5Pset_fapl_mpio(plist_id, OP_MPI_HDF5_WORLD, info); file_id = H5Fopen(file, H5F_ACC_RDONLY, plist_id ); H5Pclose(plist_id); //Create the dataset with default properties and close dataspace. dset_id = H5Dopen(file_id, name, H5P_DEFAULT); //Create property list for collective dataset write. plist_id = H5Pcreate(H5P_DATASET_XFER); H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE); int g_size = 0; //read data H5Dread(dset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, plist_id, &g_size); H5Pclose(plist_id); H5Dclose(dset_id); H5Fclose(file_id); //calculate local size of set for this mpi process int l_size = compute_local_size (g_size, comm_size, my_rank); MPI_Comm_free(&OP_MPI_HDF5_WORLD); return op_decl_set(l_size, name); }
int main(int argc, char **argv) { // OP initialisation op_init(argc, argv, 2); // MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); // timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode, ncell, nedge, nbedge, niter; /**------------------------BEGIN I/O and PARTITIONING -------------------**/ op_timers(&cpu_t1, &wall_t1); /* read in grid from disk on root processor */ FILE *fp; if ((fp = fopen("new_grid.dat", "r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } int g_nnode, g_ncell, g_nedge, g_nbedge; check_scan( fscanf(fp, "%d %d %d %d \n", &g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4); int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0; double *g_x = 0, *g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0; op_printf("reading in grid \n"); op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n", g_nnode, g_ncell, g_nedge, g_nbedge); if (my_rank == MPI_ROOT) { g_cell = (int *)malloc(4 * g_ncell * sizeof(int)); g_edge = (int *)malloc(2 * g_nedge * sizeof(int)); g_ecell = (int *)malloc(2 * g_nedge * sizeof(int)); g_bedge = (int *)malloc(2 * g_nbedge * sizeof(int)); g_becell = (int *)malloc(g_nbedge * sizeof(int)); g_bound = (int *)malloc(g_nbedge * sizeof(int)); g_x = (double *)malloc(2 * g_nnode * sizeof(double)); g_q = (double *)malloc(4 * g_ncell * sizeof(double)); g_qold = (double *)malloc(4 * g_ncell * sizeof(double)); g_res = (double *)malloc(4 * g_ncell * sizeof(double)); g_adt = (double *)malloc(g_ncell * sizeof(double)); for (int n = 0; n < g_nnode; n++) { check_scan(fscanf(fp, "%lf %lf \n", &g_x[2 * n], &g_x[2 * n + 1]), 2); } for (int n = 0; n < g_ncell; n++) { check_scan(fscanf(fp, "%d %d %d %d \n", &g_cell[4 * n], &g_cell[4 * n + 1], &g_cell[4 * n + 2], &g_cell[4 * n + 3]), 4); } for (int n = 0; n < g_nedge; n++) { check_scan(fscanf(fp, "%d %d %d %d \n", &g_edge[2 * n], &g_edge[2 * n + 1], &g_ecell[2 * n], &g_ecell[2 * n + 1]), 4); } for (int n = 0; n < g_nbedge; n++) { check_scan(fscanf(fp, "%d %d %d %d \n", &g_bedge[2 * n], &g_bedge[2 * n + 1], &g_becell[n], &g_bound[n]), 4); } // initialise flow field and residual } fclose(fp); nnode = compute_local_size(g_nnode, comm_size, my_rank); ncell = compute_local_size(g_ncell, comm_size, my_rank); nedge = compute_local_size(g_nedge, comm_size, my_rank); nbedge = compute_local_size(g_nbedge, comm_size, my_rank); op_printf( "Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n", my_rank, nnode, ncell, nedge, nbedge); /*Allocate memory to hold local sets, mapping tables and data*/ cell = (int *)malloc(4 * ncell * sizeof(int)); edge = (int *)malloc(2 * nedge * sizeof(int)); ecell = (int *)malloc(2 * nedge * sizeof(int)); bedge = (int *)malloc(2 * nbedge * sizeof(int)); becell = (int *)malloc(nbedge * sizeof(int)); bound = (int *)malloc(nbedge * sizeof(int)); x = (double *)malloc(2 * nnode * sizeof(double)); q = (double *)malloc(4 * ncell * sizeof(double)); qold = (double *)malloc(4 * ncell * sizeof(double)); res = (double *)malloc(4 * ncell * sizeof(double)); adt = (double *)malloc(ncell * sizeof(double)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_cell, cell, comm_size, g_ncell, ncell, 4); scatter_int_array(g_edge, edge, comm_size, g_nedge, nedge, 2); scatter_int_array(g_ecell, ecell, comm_size, g_nedge, nedge, 2); scatter_int_array(g_bedge, bedge, comm_size, g_nbedge, nbedge, 2); scatter_int_array(g_becell, becell, comm_size, g_nbedge, nbedge, 1); scatter_int_array(g_bound, bound, comm_size, g_nbedge, nbedge, 1); scatter_double_array(g_x, x, comm_size, g_nnode, nnode, 2); scatter_double_array(g_q, q, comm_size, g_ncell, ncell, 4); scatter_double_array(g_qold, qold, comm_size, g_ncell, ncell, 4); scatter_double_array(g_res, res, comm_size, g_ncell, ncell, 4); scatter_double_array(g_adt, adt, comm_size, g_ncell, ncell, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if (my_rank == MPI_ROOT) { free(g_cell); free(g_edge); free(g_ecell); free(g_bedge); free(g_becell); free(g_bound); free(g_x); free(g_q); free(g_qold); free(g_adt); free(g_res); } op_timers(&cpu_t2, &wall_t2); op_printf("Max total file read time = %f\n", wall_t2 - wall_t1); /**------------------------END I/O and PARTITIONING -----------------------**/ op_set edges = op_decl_set(nedge, "edges"); op_set cells = op_decl_set(ncell, "cells"); op_map pecell = op_decl_map(edges, cells, 2, ecell, "pecell"); op_dat p_res = op_decl_dat(cells, 4, "double", res, "p_res"); int count; // trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", cells, pecell, NULL); op_diagnostic_output(); // initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // indirect reduction count = 0; op_par_loop_res_calc("res_calc", edges, op_arg_dat(p_res, 0, pecell, 4, "double", OP_INC), op_arg_gbl(&count, 1, "int", OP_INC)); op_printf("number of edges:: %d should be: %d \n", count, g_nedge); if (count != g_nedge) op_printf("indirect reduction FAILED\n"); else op_printf("indirect reduction PASSED\n"); // direct reduction count = 0; op_par_loop_update("update", cells, op_arg_dat(p_res, -1, OP_ID, 4, "double", OP_RW), op_arg_gbl(&count, 1, "int", OP_INC)); op_printf("number of cells: %d should be: %d \n", count, g_ncell); if (count != g_ncell) op_printf("direct reduction FAILED\n"); else op_printf("direct reduction PASSED\n"); op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc, char **argv){ int nnode, nedge, n, e; float dx; op_set nodes, edges; op_map ppedge; op_dat p_A, p_r, p_u, p_du, p_beta, p_u_sum, p_u_max; nnode = (NN-1)*(NN-1); nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2); dx = 1.0f / ((float) NN); int *pp = (int *)malloc(sizeof(int)*2*nedge); float *A = (float *)malloc(sizeof(float)*nedge); float *r = (float *)malloc(sizeof(float)*nnode); float *u = (float *)malloc(sizeof(float)*nnode); float *du = (float *)malloc(sizeof(float)*nnode); /* create matrix and r.h.s., and set coordinates needed for renumbering / partitioning */ e = 0; for (int i=1; i<NN; i++) { for (int j=1; j<NN; j++) { n = i-1 + (j-1)*(NN-1); r[n] = 0.0f; u[n] = 0.0f; du[n] = 0.0f; pp[2*e] = n; pp[2*e+1] = n; A[e] = -1.0f; e++; for (int pass=0; pass<4; pass++) { int i2 = i; int j2 = j; if (pass==0) i2 += -1; if (pass==1) i2 += 1; if (pass==2) j2 += -1; if (pass==3) j2 += 1; if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) { r[n] += 0.25f; } else { pp[2*e] = n; pp[2*e+1] = i2-1 + (j2-1)*(NN-1); A[e] = 0.25f; e++; } } } } float u_sum, u_max, beta = 1.0f; /* OP initialisation */ op_init(argc,argv,5); /* declare sets, pointers, and datasets */ op_decl_set(&nodes,nnode, "nodes"); op_decl_set(&edges,nedge, "edges"); op_decl_map(&ppedge,&edges,&nodes,2,pp, "ppedge"); op_decl_vec(&p_A, &edges,1,sizeof(float), A, "p_A" ); op_decl_vec(&p_r, &nodes,1,sizeof(float), r, "p_r" ); op_decl_vec(&p_u, &nodes,1,sizeof(float), u, "p_u" ); op_decl_vec(&p_du,&nodes,1,sizeof(float), du, "p_du"); op_decl_gbl(&p_beta, 1,sizeof(float), &beta, "p_beta"); op_decl_gbl(&p_u_sum, 1,sizeof(float), &u_sum, "p_u_sum"); op_decl_gbl(&p_u_max, 1,sizeof(float), &u_max, "p_u_max"); alpha = 1.0f; op_decl_const(&alpha,1,sizeof(float)); op_diagnostic_output(); /* main iteration loop */ for (int iter=0; iter<NITER; iter++) { op_par_loop_4((void(*)(void*,void*,void*,void*))res,"res", &edges, op_construct_vec_arg(&p_A, OP_NONE, NULL, OP_READ), op_construct_vec_arg(&p_u, 1,&ppedge, OP_READ), op_construct_vec_arg(&p_du, 0,&ppedge, OP_INC), op_construct_gbl_arg(&p_beta,OP_READ)); u_sum = 0.0f; u_max = 0.0f; op_par_loop_5((void(*)(void*,void*,void*,void*,void*))update,"update", &nodes, op_construct_vec_arg(&p_r, OP_NONE, NULL, OP_READ), op_construct_vec_arg(&p_du, OP_NONE, NULL, OP_RW), op_construct_vec_arg(&p_u, OP_NONE, NULL, OP_INC), op_construct_gbl_arg(&p_u_sum,OP_INC), op_construct_gbl_arg(&p_u_max,OP_MAX)); printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/nnode)); } /* print out results */ printf("\n Results after %d iterations:\n\n",NITER); op_fetch_data(p_u); /* op_fetch_data(p_du); op_fetch_data(p_r); */ for (int pass=0; pass<1; pass++) { /* if(pass==0) printf("\narray u\n"); else if(pass==1) printf("\narray du\n"); else if(pass==2) printf("\narray r\n"); */ for (int j=NN-1; j>0; j--) { for (int i=1; i<NN; i++) { if (pass==0) printf(" %7.4f",u[i-1 + (j-1)*(NN-1)]); else if (pass==1) printf(" %7.4f",du[i-1 + (j-1)*(NN-1)]); else if (pass==2) printf(" %7.4f",r[i-1 + (j-1)*(NN-1)]); } printf("\n"); } printf("\n"); } op_timing_output(); op_exit(); }
int main(int argc,char *argv[]) { int *becell; int *ecell; int *bound; int *bedge; int *edge; int *cell; float *x; float *q; float *qold; float *adt; float *res; int nnode; int ncell; int nedge; int nbedge; int niter; float rms; if (argc != 2) { printf("Usage: airfoil <grid>\n"); exit(1); } // read in grid printf("reading in grid \n"); char *grid = argv[1]; FILE *fp; if ((fp = fopen(grid,"r")) == 0L) { printf("can\'t open file %s\n",grid); exit((-1)); } if (fscanf(fp,"%d %d %d %d \n",&nnode,&ncell,&nedge,&nbedge) != 4) { printf("error reading from %s\n",grid); exit((-1)); } cell = ((int *)(malloc(((4 * ncell) * (sizeof(int )))))); edge = ((int *)(malloc(((2 * nedge) * (sizeof(int )))))); ecell = ((int *)(malloc(((2 * nedge) * (sizeof(int )))))); bedge = ((int *)(malloc(((2 * nbedge) * (sizeof(int )))))); becell = ((int *)(malloc((nbedge * (sizeof(int )))))); bound = ((int *)(malloc((nbedge * (sizeof(int )))))); x = ((float *)(malloc(((2 * nnode) * (sizeof(float )))))); q = ((float *)(malloc(((4 * ncell) * (sizeof(float )))))); qold = ((float *)(malloc(((4 * ncell) * (sizeof(float )))))); res = ((float *)(malloc(((4 * ncell) * (sizeof(float )))))); adt = ((float *)(malloc((ncell * (sizeof(float )))))); for (int n = 0; n < nnode; n++) { if (fscanf(fp,"%f %f \n",(x + (2 * n)),(x + ((2 * n) + 1))) != 2) { printf("error reading from new_grid.dat\n"); exit((-1)); } } for (int n = 0; n < ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",(cell + (4 * n)),(cell + ((4 * n) + 1)),(cell + ((4 * n) + 2)),(cell + ((4 * n) + 3))) != 4) { printf("error reading from new_grid.dat\n"); exit((-1)); } } for (int n = 0; n < nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",(edge + (2 * n)),(edge + ((2 * n) + 1)),(ecell + (2 * n)),(ecell + ((2 * n) + 1))) != 4) { printf("error reading from new_grid.dat\n"); exit((-1)); } } for (int n = 0; n < nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",(bedge + (2 * n)),(bedge + ((2 * n) + 1)),(becell + n),(bound + n)) != 4) { printf("error reading from new_grid.dat\n"); exit((-1)); } } fclose(fp); // set constants and initialise flow field and residual printf("initialising flow field \n"); gam = 1.4f; gm1 = (gam - 1.0f); cfl = 0.9f; eps = 0.05f; float mach = 0.4f; float alpha = ((3.0f * atan(1.0f)) / 45.0f); float p = 1.0f; float r = 1.0f; float u = (sqrt(((gam * p) / r)) * mach); float e = ((p / (r * gm1)) + ((0.5f * u) * u)); qinf[0] = r; qinf[1] = (r * u); qinf[2] = 0.0f; qinf[3] = (r * e); for (int n = 0; n < ncell; n++) { for (int m = 0; m < 4; m++) { q[(4 * n) + m] = qinf[m]; res[(4 * n) + m] = 0.0f; } } // OP initialisation op_init(argc,argv,2); // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode,"nodes"); op_set edges = op_decl_set(nedge,"edges"); op_set bedges = op_decl_set(nbedge,"bedges"); op_set cells = op_decl_set(ncell,"cells"); op_map pedge = op_decl_map(edges,nodes,2,edge,"pedge"); op_map pecell = op_decl_map(edges,cells,2,ecell,"pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge,"pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells,nodes,4,cell,"pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int",bound,"p_bound"); op_dat p_x = op_decl_dat(nodes,2,"float",x,"p_x"); op_dat p_q = op_decl_dat(cells,4,"float",q,"p_q"); op_dat p_qold = op_decl_dat(cells,4,"float",qold,"p_qold"); op_dat p_adt = op_decl_dat(cells,1,"float",adt,"p_adt"); op_dat p_res = op_decl_dat(cells,4,"float",res,"p_res"); op_decl_const(1,"float",&gam); op_decl_const(1,"float",&gm1); op_decl_const(1,"float",&cfl); op_decl_const(1,"float",&eps); op_decl_const(1,"float",&mach); op_decl_const(1,"float",&alpha); op_decl_const(4,"float",qinf); op_diagnostic_output(); // main time-marching loop niter = 1000; for (int iter = 1; iter <= niter; iter++) { // save old flow solution save_soln_host("save_soln_modified",cells,op_arg_dat(p_q,(-1), OP_ID,4,"float",OP_READ),op_arg_dat(p_qold,(-1), OP_ID,4,"float",OP_WRITE)); // predictor/corrector update loop for (int k = 0; k < 2; k++) { // calculate area/timstep adt_calc_host("adt_calc_modified",cells,op_arg_dat(p_x,0,pcell,2,"float",OP_READ),op_arg_dat(p_x,1,pcell,2,"float",OP_READ),op_arg_dat(p_x,2,pcell,2,"float",OP_READ),op_arg_dat(p_x,3,pcell,2,"float",OP_READ),op_arg_dat(p_q,(-1), OP_ID,4,"float",OP_READ),op_arg_dat(p_adt,(-1), OP_ID,1,"float",OP_WRITE)); // calculate flux residual res_calc_host("res_calc_modified",edges,op_arg_dat(p_x,0,pedge,2,"float",OP_READ),op_arg_dat(p_x,1,pedge,2,"float",OP_READ),op_arg_dat(p_q,0,pecell,4,"float",OP_READ),op_arg_dat(p_q,1,pecell,4,"float",OP_READ),op_arg_dat(p_adt,0,pecell,1,"float",OP_READ),op_arg_dat(p_adt,1,pecell,1,"float",OP_READ),op_arg_dat(p_res,0,pecell,4,"float",OP_INC),op_arg_dat(p_res,1,pecell,4,"float",OP_INC)); bres_calc_host("bres_calc_modified",bedges,op_arg_dat(p_x,0,pbedge,2,"float",OP_READ),op_arg_dat(p_x,1,pbedge,2,"float",OP_READ),op_arg_dat(p_q,0,pbecell,4,"float",OP_READ),op_arg_dat(p_adt,0,pbecell,1,"float",OP_READ),op_arg_dat(p_res,0,pbecell,4,"float",OP_INC),op_arg_dat(p_bound,(-1), OP_ID,1,"int",OP_READ)); // update flow field rms = 0.0; update_host("update_modified",cells,op_arg_dat(p_qold,(-1), OP_ID,4,"float",OP_READ),op_arg_dat(p_q,(-1), OP_ID,4,"float",OP_WRITE),op_arg_dat(p_res,(-1), OP_ID,4,"float",OP_RW),op_arg_dat(p_adt,(-1), OP_ID,1,"float",OP_READ),op_arg_gbl(&rms,1,"float",OP_INC)); } // print iteration history rms = (sqrt((rms / ((float )ncell)))); if ((iter % 100) == 0) printf(" %d %10.5e \n",iter,rms); } /* for (int ll = 0; ll < (4 * ncell); ll++) printf("%lf\n",q[ll]);*/ op_timing_output(); return 0; }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // read in grid op_printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("./new_grid.dat","r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (double *) malloc(2*nnode*sizeof(double)); q = (double *) malloc(4*ncell*sizeof(double)); qold = (double *) malloc(4*ncell*sizeof(double)); res = (double *) malloc(4*ncell*sizeof(double)); adt = (double *) malloc( ncell*sizeof(double)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%lf %lf \n",&x[2*n], &x[2*n+1]) != 2) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); // set constants and initialise flow field and residual op_printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = qinf[m]; res[4*n+m] = 0.0f; } } // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"double",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"double",q ,"p_q"); //op_dat p_qold = op_decl_dat(cells ,4,"double",qold ,"p_qold"); //op_dat p_adt = op_decl_dat(cells ,1,"double",adt ,"p_adt"); //op_dat p_res = op_decl_dat(cells ,4,"double",res ,"p_res"); // p_res, p_adt and p_qold now declared as a temp op_dats during // the execution of the time-marching loop op_decl_const2("gam",1,"double",&gam); op_decl_const2("gm1",1,"double",&gm1); op_decl_const2("cfl",1,"double",&cfl); op_decl_const2("eps",1,"double",&eps); op_decl_const2("mach",1,"double",&mach); op_decl_const2("alpha",1,"double",&alpha); op_decl_const2("qinf",4,"double",qinf); op_diagnostic_output(); double g_ncell = op_get_size(cells); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { double* tmp_elem = NULL; op_dat p_res = op_decl_dat_temp(cells ,4,"double",tmp_elem,"p_res"); op_dat p_adt = op_decl_dat_temp(cells ,1,"double",tmp_elem,"p_adt"); op_dat p_qold = op_decl_dat_temp(cells ,4,"double",qold ,"p_qold"); // save old flow solution op_par_loop_save_soln("save_soln",cells, op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop_adt_calc("adt_calc",cells, op_arg_dat(p_x,0,pcell,2,"double",OP_READ), op_arg_dat(p_x,1,pcell,2,"double",OP_READ), op_arg_dat(p_x,2,pcell,2,"double",OP_READ), op_arg_dat(p_x,3,pcell,2,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_WRITE)); // calculate flux residual op_par_loop_res_calc("res_calc",edges, op_arg_dat(p_x,0,pedge,2,"double",OP_READ), op_arg_dat(p_x,1,pedge,2,"double",OP_READ), op_arg_dat(p_q,0,pecell,4,"double",OP_READ), op_arg_dat(p_q,1,pecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pecell,1,"double",OP_READ), op_arg_dat(p_adt,1,pecell,1,"double",OP_READ), op_arg_dat(p_res,0,pecell,4,"double",OP_INC), op_arg_dat(p_res,1,pecell,4,"double",OP_INC)); op_par_loop_bres_calc("bres_calc",bedges, op_arg_dat(p_x,0,pbedge,2,"double",OP_READ), op_arg_dat(p_x,1,pbedge,2,"double",OP_READ), op_arg_dat(p_q,0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pbecell,1,"double",OP_READ), op_arg_dat(p_res,0,pbecell,4,"double",OP_INC), op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ)); // update flow field rms = 0.0; op_par_loop_update("update",cells, op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_WRITE), op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double)g_ncell ); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); if (iter%1000 == 0 && g_ncell == 720000){ //defailt mesh -- for validation testing //op_printf(" %d %3.16f \n",iter,rms); double diff=fabs((100.0*(rms/0.0001060114637578))-100.0); op_printf("\n\nTest problem with %d cells is within %3.15E %% of the expected solution\n",720000, diff); if(diff < 0.00001) { op_printf("This test is considered PASSED\n"); } else { op_printf("This test is considered FAILED\n"); } } if (op_free_dat_temp(p_res) < 0) op_printf("Error: temporary op_dat %s cannot be removed\n",p_res->name); if (op_free_dat_temp(p_adt) < 0) op_printf("Error: temporary op_dat %s cannot be removed\n",p_adt->name); if (op_free_dat_temp(p_qold) < 0) op_printf("Error: temporary op_dat %s cannot be removed\n",p_qold->name); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc, char **argv){ int nnode, nedge, n, e; float dx; nnode = (NN-1)*(NN-1); nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2); dx = 1.0f / ((float) NN); int *pp = (int *)malloc(sizeof(int)*2*nedge); int *p1 = (int *)malloc(sizeof(int)*nedge); int *p2 = (int *)malloc(sizeof(int)*nedge); float *xe = (float *)malloc(sizeof(float)*2*nedge); float *xn = (float *)malloc(sizeof(float)*2*nnode); double *A = (double *)malloc(sizeof(double)*3*nedge); float *r = (float *)malloc(sizeof(float)*2*nnode); float *u = (float *)malloc(sizeof(float)*2*nnode); float *du = (float *)malloc(sizeof(float)*3*nnode); // create matrix and r.h.s., and set coordinates needed for renumbering / partitioning e = 0; for (int i=1; i<NN; i++) { for (int j=1; j<NN; j++) { n = i-1 + (j-1)*(NN-1); r[2*n] = 0.0f; u[2*n] = 0.0f; du[3*n] = 0.0f; xn[2*n ] = i*dx; xn[2*n+1] = j*dx; p1[e] = n; p2[e] = n; pp[2*e] = p1[e]; pp[2*e+1] = p2[e]; A[3*e] = -1.0f; xe[2*e ] = i*dx; xe[2*e+1] = j*dx; e++; for (int pass=0; pass<4; pass++) { int i2 = i; int j2 = j; if (pass==0) i2 += -1; if (pass==1) i2 += 1; if (pass==2) j2 += -1; if (pass==3) j2 += 1; if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) { r[2*n] += 0.25f; } else { p1[e] = n; p2[e] = i2-1 + (j2-1)*(NN-1); pp[2*e] = p1[e]; pp[2*e+1] = p2[e]; A[3*e] = 0.25f; xe[2*e ] = i*dx; xe[2*e+1] = j*dx; e++; } } } } // OP initialisation op_init(argc,argv,5); // declare sets, pointers, and datasets op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_map ppedge = op_decl_map(edges,nodes,2,pp, "ppedge"); op_dat p_A = op_decl_dat(edges,3,"double",A, "p_A" ); op_dat p_r = op_decl_dat(nodes,2,"float", r, "p_r" ); op_dat p_u = op_decl_dat(nodes,2,"float", u, "p_u" ); op_dat p_du = op_decl_dat(nodes,3,"float", du, "p_du"); alpha = 2.0f; op_decl_const(1,"float",&alpha); alpha = 1.0f; op_decl_const(1,"float",&alpha); op_diagnostic_output(); // main iteration loop float u_sum, u_max, beta = 1.0f; for (int iter=0; iter<NITER; iter++) { op_par_loop(res,"res", edges, op_arg_dat(p_A, -1,OP_ID, 3,"double",OP_READ), op_arg_dat(p_u, 1,ppedge, 2,"float", OP_READ), op_arg_dat(p_du, 0,ppedge, 3,"float", OP_INC ), op_arg_gbl(&beta,1,"float",OP_READ)); u_sum = 0.0f; u_max = 0.0f; op_par_loop(update,"update", nodes, op_arg_dat(p_r, -1,OP_ID, 2,"float",OP_READ), op_arg_dat(p_du, -1,OP_ID, 3,"float",OP_RW ), op_arg_dat(p_u, -1,OP_ID, 2,"float",OP_INC ), op_arg_gbl(&u_sum,1,"float",OP_INC), op_arg_gbl(&u_max,1,"float",OP_MAX)); printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/nnode)); } // print out results printf("\n Results after %d iterations:\n\n",NITER); op_fetch_data(p_u); /* op_fetch_data(p_du); op_fetch_data(p_r); */ for (int pass=0; pass<1; pass++) { /* if(pass==0) printf("\narray u\n"); else if(pass==1) printf("\narray du\n"); else if(pass==2) printf("\narray r\n"); */ for (int j=NN-1; j>0; j--) { for (int i=1; i<NN; i++) { if (pass==0) printf(" %7.4f",u[2*(i-1 + (j-1)*(NN-1))]); else if (pass==1) printf(" %7.4f",du[i-1 + (j-1)*(NN-1)]); else if (pass==2) printf(" %7.4f",r[2*(i-1 + (j-1)*(NN-1))]); } printf("\n"); } printf("\n"); } op_timing_output(); op_exit(); // free allocated arrays free(pp); free(A); free(r); free(u); free(du); }
int main(int argc, char **argv){ int *becell, *ecell, *bound, *bedge, *edge, *cell; float *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; float rms; // read in grid printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("/work/rr908/airfoil/new_grid.dat","r")) == NULL) { printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (float *) malloc(2*nnode*sizeof(float)); q = (float *) malloc(4*ncell*sizeof(float)); qold = (float *) malloc(4*ncell*sizeof(float)); res = (float *) malloc(4*ncell*sizeof(float)); adt = (float *) malloc( ncell*sizeof(float)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%f %f \n",&x[2*n], &x[2*n+1]) != 2) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); #ifdef DIAGNOSTIC print_array((float *) x, nnode, "initial_nodes"); print_array((float *) cell, ncell, "initial_cells"); FILE *flog; flog = fopen( "initial_cells_cellarray", "w" ); for( int i=0; i< ncell; ++i ) { fprintf( flog, "%d %d %d %d\n", cell[4*i], cell[4*i+1], cell[4*i+2], cell[4*i+3] ); } fclose( flog ); print_array((float *) edge, nedge, "initial_edges"); print_array((float *) ecell, nedge, "initiall_edges_for_cell"); print_array((float *) bedge, nbedge, "initial_border_edges"); print_array((float *) becell, nbedge, "initial_becell"); print_array((float *) bound, nbedge, "initial bound"); #endif // set constants and initialise flow field and residual printf("initialising flow field \n"); g_const.gam = 1.4f; g_const.gm1 = g_const.gam - 1.0f; g_const.cfl = 0.9f; g_const.eps = 0.05f; g_const.mach = 0.4f; g_const.alpha = 3.0f*atan(1.0f)/45.0f; float p = 1.0f; float r = 1.0f; float u = sqrt(g_const.gam*p/r)*g_const.mach; float e = p/(r*g_const.gm1) + 0.5f*u*u; g_const.qinf[0] = r; g_const.qinf[1] = r*u; g_const.qinf[2] = 0.0f; g_const.qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = g_const.qinf[m]; res[4*n+m] = 0.0f; } } // OP initialisation printf("OP initialisation\n"); op_init(argc,argv,2); g_const_d = op_allocate_constant( &g_const, sizeof( struct global_constants ) ); // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"float",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"float",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"float",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"float",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"float",res ,"p_res"); op_decl_const2("gam",1,"float",&g_const.gam ); op_decl_const2("gm1",1,"float",&g_const.gm1 ); op_decl_const2("cfl",1,"float",&g_const.cfl ); op_decl_const2("eps",1,"float",&g_const.eps ); op_decl_const2("mach",1,"float",&g_const.mach ); op_decl_const2("alpha",1,"float",&g_const.alpha); op_decl_const2("qinf",4,"float",g_const.qinf ); op_diagnostic_output(); #ifdef DIAGNOSTIC dump_array(p_bound, "initial_dat_p_bound"); dump_array(p_x, "initial_dat_p_x"); dump_array(p_q, "initiall_dat_p_q"); dump_array(p_qold, "initial_dat_p_qold"); dump_array(p_adt, "initial_dat_p_adt"); dump_array(p_res, "initial_dat_res"); #endif // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution // dump_array(p_q, "p_q_iter_before"); // dump_array(p_qold, "p_q_old_iter_before"); op_par_loop_save_soln("save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_WRITE)); // dump_array(p_q, "p_q_iter_after"); // dump_array(p_qold, "p_q_old_iter_after"); /* if ( iter == 1 ) { dump_array( p_qold, "p_qold" ); } */ #ifdef DIAGNOSTIC if (iter==1) { dump_array( p_qold, "p_qold" ); } #endif //dump_array( p_qold, "p_qold" ); //op_fetch_data( p_qold ); //print_array( ( float *) p_qold->data, 4*p_qold->set->size, "p_qold" ); // print_array( p_q, "p_qold2" ); // print_array( p_qold, "p_qold" ); //assert( p_q->data[0] != 0.0f ); // predictor/corrector update loop // dump_array(p_adt, "p_adt_before"); for(int k=0; k<2; k++) { // calculate area/timstep if(k == 0 && iter == 0) { printf("Dumping adt before adt_calc execution array"); op_fetch_data( p_adt ); float* array = (float *) p_adt->data; long size = p_adt->set->size; for(long elem = 0; elem < size; ++elem) { printf("%lf",array[elem]); } } op_par_loop_adt_calc("adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"float",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"float",OP_WRITE)); if(k == 0 && iter == 0) { printf("Dumping adt after 1x adt_calc execution array"); op_fetch_data( p_adt ); float* array = (float *) p_adt->data; long size = p_adt->set->size; for(long elem = 0; elem < size; ++elem) { printf("%lf",array[elem]); } } #ifdef DIAGNOSTIC if (iter==1 && k==0) { dump_array( p_adt, "p_adt0" ); } if (iter==1 && k==1) { dump_array( p_adt, "p_adt1" ); } #endif // dump_array(p_adt, "p_adt_after"); // calculate flux residual op_par_loop_res_calc("res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"float",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"float",OP_READ), op_arg_dat(p_q, 0,pecell,4,"float",OP_READ), op_arg_dat(p_q, 1,pecell,4,"float",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"float",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"float",OP_READ), op_arg_dat(p_res, 0,pecell,4,"float",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"float",OP_INC )); #ifdef DIAGNOSTIC if (iter==1 && k==0) { dump_array( p_res, "p_res0" ); } if (iter==1 && k==1) { dump_array( p_res, "p_res1" ); } #endif op_par_loop_bres_calc("bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"float",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"float",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"float",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"float",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"float",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); #ifdef DIAGNOSTIC if (iter==1 && k==0) { dump_array( p_res, "p_res_a0" ); } if (iter==1 && k==0) { dump_array( p_res, "p_res_a1" ); } #endif // update flow field rms = 0.0; op_par_loop_update("update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"float",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"float",OP_READ ), op_arg_gbl(&rms,1,"float",OP_INC)); } #ifdef DIAGNOSTIC if (iter==1) { dump_array( p_q, "p_q1" ); } #endif // print iteration history rms = sqrt(rms/(float) ncell); if (iter%100 == 0) printf(" %d %10.5e \n",iter,rms); } op_timing_output(); #ifdef DIAGNOSTIC dump_array( p_q, "p_q" ); #endif }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,5); int nnode, nedge, n, e; nnode = (NN-1)*(NN-1); nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2); int *pp = (int *)malloc(sizeof(int)*2*nedge); float *A = (float *)malloc(sizeof(float)*nedge); float *r = (float *)malloc(sizeof(float)*nnode); float *u = (float *)malloc(sizeof(float)*nnode); float *du = (float *)malloc(sizeof(float)*nnode); // create matrix and r.h.s., and set coordinates needed for renumbering / partitioning e = 0; for (int i=1; i<NN; i++) { for (int j=1; j<NN; j++) { n = i-1 + (j-1)*(NN-1); r[n] = 0.0f; u[n] = 0.0f; du[n] = 0.0f; pp[2*e] = n; pp[2*e+1] = n; A[e] = -1.0f; e++; for (int pass=0; pass<4; pass++) { int i2 = i; int j2 = j; if (pass==0) i2 += -1; if (pass==1) i2 += 1; if (pass==2) j2 += -1; if (pass==3) j2 += 1; if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) { r[n] += 0.25f; } else { pp[2*e] = n; pp[2*e+1] = i2-1 + (j2-1)*(NN-1); A[e] = 0.25f; e++; } } } } // declare sets, pointers, and datasets op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_map ppedge = op_decl_map(edges,nodes,2,pp, "ppedge"); op_dat p_A = op_decl_dat(edges,1,"float",A, "p_A" ); op_dat p_r = op_decl_dat(nodes,1,"float",r, "p_r" ); op_dat p_u = op_decl_dat(nodes,1,"float",u, "p_u" ); op_dat p_du = op_decl_dat(nodes,1,"float",du, "p_du"); alpha = 1.0f; op_decl_const2("alpha",1,"float",&alpha); op_diagnostic_output(); // main iteration loop float u_sum, u_max, beta = 1.0f; for (int iter=0; iter<NITER; iter++) { op_par_loop_res("res",edges, op_arg_dat(p_A,-1,OP_ID,1,"float",OP_READ), op_arg_dat(p_u,1,ppedge,1,"float",OP_READ), op_arg_dat(p_du,0,ppedge,1,"float",OP_INC), op_arg_gbl(&beta,1,"float",OP_READ)); u_sum = 0.0f; u_max = 0.0f; op_par_loop_update("update",nodes, op_arg_dat(p_r,-1,OP_ID,1,"float",OP_READ), op_arg_dat(p_du,-1,OP_ID,1,"float",OP_RW), op_arg_dat(p_u,-1,OP_ID,1,"float",OP_INC), op_arg_gbl(&u_sum,1,"float",OP_INC), op_arg_gbl(&u_max,1,"float",OP_MAX)); op_printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/nnode)); } // print out results op_printf("\n Results after %d iterations:\n\n",NITER); op_fetch_data(p_u, u); for (int pass=0; pass<1; pass++) { for (int j=NN-1; j>0; j--) { for (int i=1; i<NN; i++) { if (pass==0) op_printf(" %7.4f",u[i-1 + (j-1)*(NN-1)]); else if (pass==1) op_printf(" %7.4f",du[i-1 + (j-1)*(NN-1)]); else if (pass==2) op_printf(" %7.4f",r[i-1 + (j-1)*(NN-1)]); } op_printf("\n"); } op_printf("\n"); } op_timing_output(); int result = check_result<float>(u, NN, TOLERANCE); op_exit(); free(pp); free(A); free(u); free(du); free(r); return result; }
int main(int argc, char **argv){ int *becell, *ecell, *bound, *bedge, *edge, *cell; float *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; float rms; op_set nodes, edges, bedges, cells; op_map pedge, pecell, pbedge, pbecell, pcell; op_dat p_x, p_q, p_qold, p_res, p_adt, p_bound, p_rms; /* read in grid */ printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("new_grid.dat","r")) == NULL) { printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (float *) malloc(2*nnode*sizeof(float)); q = (float *) malloc(4*ncell*sizeof(float)); qold = (float *) malloc(4*ncell*sizeof(float)); res = (float *) malloc(4*ncell*sizeof(float)); adt = (float *) malloc( ncell*sizeof(float)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%f %f \n",&x[2*n], &x[2*n+1]) != 2) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); /* set constants and initialise flow field and residual */ printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; float mach = 0.4f; float alpha = 3.0f*atan(1.0f)/45.0f; float p = 1.0f; float r = 1.0f; float u = sqrt(gam*p/r)*mach; float e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = qinf[m]; res[4*n+m] = 0.0f; } } /* OP initialisation */ op_init(argc,argv,2); /* declare sets, pointers, datasets and global constants */ op_decl_set(&nodes, nnode, "nodes"); op_decl_set(&edges, nedge, "edges"); op_decl_set(&bedges,nbedge, "bedges"); op_decl_set(&cells, ncell, "cells"); op_decl_map(&pedge, &edges, &nodes,2,edge, "pedge"); op_decl_map(&pecell, &edges, &cells,2,ecell, "pecell"); op_decl_map(&pbedge, &bedges,&nodes,2,bedge, "pbedge"); op_decl_map(&pbecell,&bedges,&cells,1,becell,"pbecell"); op_decl_map(&pcell, &cells, &nodes,4,cell, "pcell"); op_decl_vec(&p_bound,&bedges,1,sizeof(int),bound,"p_bound"); op_decl_vec(&p_x ,&nodes ,2,sizeof(float),x ,"p_x"); op_decl_vec(&p_q ,&cells ,4,sizeof(float),q ,"p_q"); op_decl_vec(&p_qold ,&cells ,4,sizeof(float),qold ,"p_qold"); op_decl_vec(&p_adt ,&cells ,1,sizeof(float),adt ,"p_adt"); op_decl_vec(&p_res ,&cells ,4,sizeof(float),res ,"p_res"); op_decl_gbl(&p_rms ,1,sizeof(float),&rms ,"p_rms"); op_decl_const(&gam, 1, sizeof(float)); op_decl_const(&gm1, 1, sizeof(float)); op_decl_const(&cfl, 1, sizeof(float)); op_decl_const(&eps, 1, sizeof(float)); op_decl_const(&mach, 1, sizeof(float)); op_decl_const(&alpha,1, sizeof(float)); op_decl_const(qinf, 4, sizeof(float)); op_diagnostic_output(); /* main time-marching loop */ niter = 1000; for(int iter=1; iter<=niter; iter++) { /* save old flow solution */ op_par_loop_2((void(*)(void*,void*))save_soln,"save_soln", &cells, op_construct_vec_arg(&p_q, OP_NONE, NULL,OP_READ ), op_construct_vec_arg(&p_qold,OP_NONE, NULL,OP_WRITE)); /* predictor/corrector update loop */ for(int k=0; k<2; k++) { /* calculate area/timstep */ op_par_loop_3((void(*)(void*,void*,void*))adt_calc,"adt_calc",&cells, op_construct_vec_arg(&p_x, OP_ALL, &pcell, OP_READ ), op_construct_vec_arg(&p_q, OP_NONE, NULL, OP_READ ), op_construct_vec_arg(&p_adt, OP_NONE, NULL, OP_WRITE)); /* calculate flux residual */ op_par_loop_4((void(*)(void*,void*,void*,void*))res_calc,"res_calc",&edges, op_construct_vec_arg(&p_x, OP_ALL,&pedge, OP_READ), op_construct_vec_arg(&p_q, OP_ALL,&pecell,OP_READ), op_construct_vec_arg(&p_adt, OP_ALL,&pecell,OP_READ), op_construct_vec_arg(&p_res, OP_ALL,&pecell,OP_INC)); op_par_loop_5((void(*)(void*,void*,void*,void*,void*))bres_calc,"bres_calc",&bedges, op_construct_vec_arg(&p_x, OP_ALL,&pbedge, OP_READ), op_construct_vec_arg(&p_q, 0,&pbecell,OP_READ), op_construct_vec_arg(&p_adt, 0,&pbecell,OP_READ), op_construct_vec_arg(&p_res, 0,&pbecell,OP_INC), op_construct_vec_arg(&p_bound,OP_NONE, NULL,OP_READ)); /* update flow field */ rms = 0.0; op_par_loop_5((void(*)(void*,void*,void*,void*,void*))update,"update",&cells, op_construct_vec_arg(&p_qold,OP_NONE, NULL, OP_READ), op_construct_vec_arg(&p_q, OP_NONE, NULL, OP_WRITE), op_construct_vec_arg(&p_res, OP_NONE, NULL, OP_RW), op_construct_vec_arg(&p_adt, OP_NONE, NULL, OP_READ), op_construct_gbl_arg(&p_rms, OP_INC)); } /* print iteration history */ rms = sqrt(rms/(float) ncell); if (iter%100 == 0) printf(" %d %10.5e \n",iter,rms); } op_timing_output(); }
int main(int argc, char *argv[]){ int *becell, *ecell, *bound, *bedge, *edge, *cell; REAL *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; REAL rms; if (argc != 2) { printf("Usage: airfoil <grid>\n"); exit(1); } // read in grid printf("reading in grid \n"); char* grid = argv[1]; FILE *fp; if ( (fp = fopen(grid,"r")) == NULL) { printf("can't open file %s\n", grid); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { printf("error reading from %s\n", grid); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (REAL *) malloc(2*nnode*sizeof(REAL)); q = (REAL *) malloc(4*ncell*sizeof(REAL)); qold = (REAL *) malloc(4*ncell*sizeof(REAL)); res = (REAL *) malloc(4*ncell*sizeof(REAL)); adt = (REAL *) malloc( ncell*sizeof(REAL)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%f %f \n",&x[2*n], &x[2*n+1]) != 2) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); // set constants and initialise flow field and residual printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; REAL mach = 0.4f; REAL alpha = 3.0f*atan(1.0f)/45.0f; REAL p = 1.0f; REAL r = 1.0f; REAL u = sqrt(gam*p/r)*mach; REAL e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = qinf[m]; res[4*n+m] = 0.0f; } } // OP initialisation op_init(argc,argv,2); op_tuner* global_tuner = op_create_global_tuner(); global_tuner->op_warpsize = 1; global_tuner->block_size = 64; global_tuner->part_size = 128; global_tuner->cache_line_size = 128; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,REAL_STRING,x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,REAL_STRING,q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,REAL_STRING,qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,REAL_STRING,adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,REAL_STRING,res ,"p_res"); op_decl_const(1,REAL_STRING,&gam ); op_decl_const(1,REAL_STRING,&gm1 ); op_decl_const(1,REAL_STRING,&cfl ); op_decl_const(1,REAL_STRING,&eps ); op_decl_const(1,REAL_STRING,&mach ); op_decl_const(1,REAL_STRING,&alpha); op_decl_const(4,REAL_STRING,qinf ); op_tuner* save_soln_tuner = op_create_tuner("save_soln"); save_soln_tuner->part_size = 64; save_soln_tuner->block_size = 4; op_tuner* adt_calc_tuner = op_create_tuner("adt_calc"); adt_calc_tuner->part_size = 64; adt_calc_tuner->block_size = 4; op_tuner* res_calc_tuner = op_create_tuner("res_calc"); res_calc_tuner->part_size = 64; res_calc_tuner->block_size = 4; op_tuner* bres_calc_tuner = op_create_tuner("bres_calc"); bres_calc_tuner->part_size = 64; bres_calc_tuner->block_size = 4; op_tuner* update_tuner = op_create_tuner("update"); update_tuner->part_size = 64; update_tuner->block_size = 4; op_diagnostic_output(); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,REAL_STRING,OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,REAL_STRING,OP_WRITE), save_soln_tuner); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,REAL_STRING,OP_READ ), op_arg_dat(p_x, 1,pcell, 2,REAL_STRING,OP_READ ), op_arg_dat(p_x, 2,pcell, 2,REAL_STRING,OP_READ ), op_arg_dat(p_x, 3,pcell, 2,REAL_STRING,OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,REAL_STRING,OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,REAL_STRING,OP_WRITE), adt_calc_tuner); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,REAL_STRING,OP_READ), op_arg_dat(p_x, 1,pedge, 2,REAL_STRING,OP_READ), op_arg_dat(p_q, 0,pecell,4,REAL_STRING,OP_READ), op_arg_dat(p_q, 1,pecell,4,REAL_STRING,OP_READ), op_arg_dat(p_adt, 0,pecell,1,REAL_STRING,OP_READ), op_arg_dat(p_adt, 1,pecell,1,REAL_STRING,OP_READ), op_arg_dat(p_res, 0,pecell,4,REAL_STRING,OP_INC ), op_arg_dat(p_res, 1,pecell,4,REAL_STRING,OP_INC ), res_calc_tuner); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,REAL_STRING,OP_READ), op_arg_dat(p_x, 1,pbedge, 2,REAL_STRING,OP_READ), op_arg_dat(p_q, 0,pbecell,4,REAL_STRING,OP_READ), op_arg_dat(p_adt, 0,pbecell,1,REAL_STRING,OP_READ), op_arg_dat(p_res, 0,pbecell,4,REAL_STRING,OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ), bres_calc_tuner); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,REAL_STRING,OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,REAL_STRING,OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,REAL_STRING,OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,REAL_STRING,OP_READ ), op_arg_gbl(&rms,1,REAL_STRING,OP_INC), update_tuner); } // print iteration history rms = sqrt(rms/(REAL) ncell); if ( iter % 100 == 0 ) printf(" %d %10.5e \n",iter,rms); } for ( int ll = 0; ll < 4*ncell; ll++ ) { printf ( "%lf\n", q[ll] ); } op_timing_output(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); int *becell, *ecell, *bound, *bedge, *edge, *cell; float *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge; /**------------------------BEGIN I/O -------------------**/ char file[] = "new_grid.dat"; char file_out[] = "new_grid_out.h5"; /* read in grid from disk on root processor */ FILE *fp; if ( (fp = fopen(file,"r")) == NULL) { op_printf("can't open file %s\n",file); exit(-1); } int g_nnode,g_ncell,g_nedge,g_nbedge; check_scan(fscanf(fp,"%d %d %d %d \n",&g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4); int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0; float *g_x = 0,*g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0; // set constants op_printf("initialising flow field\n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; float mach = 0.4f; float alpha = 3.0f*atan(1.0f)/45.0f; float p = 1.0f; float r = 1.0f; float u = sqrt(gam*p/r)*mach; float e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; op_printf("reading in grid \n"); op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n" ,g_nnode,g_ncell,g_nedge,g_nbedge); if(my_rank == MPI_ROOT) { g_cell = (int *) malloc(4*g_ncell*sizeof(int)); g_edge = (int *) malloc(2*g_nedge*sizeof(int)); g_ecell = (int *) malloc(2*g_nedge*sizeof(int)); g_bedge = (int *) malloc(2*g_nbedge*sizeof(int)); g_becell = (int *) malloc( g_nbedge*sizeof(int)); g_bound = (int *) malloc( g_nbedge*sizeof(int)); g_x = (float *) malloc(2*g_nnode*sizeof(float)); g_q = (float *) malloc(4*g_ncell*sizeof(float)); g_qold = (float *) malloc(4*g_ncell*sizeof(float)); g_res = (float *) malloc(4*g_ncell*sizeof(float)); g_adt = (float *) malloc( g_ncell*sizeof(float)); for (int n=0; n<g_nnode; n++){ check_scan(fscanf(fp,"%f %f \n",&g_x[2*n], &g_x[2*n+1]), 2); } for (int n=0; n<g_ncell; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_cell[4*n ], &g_cell[4*n+1], &g_cell[4*n+2], &g_cell[4*n+3]), 4); } for (int n=0; n<g_nedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_edge[2*n],&g_edge[2*n+1], &g_ecell[2*n],&g_ecell[2*n+1]), 4); } for (int n=0; n<g_nbedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_bedge[2*n],&g_bedge[2*n+1], &g_becell[n],&g_bound[n]), 4); } //initialise flow field and residual for (int n=0; n<g_ncell; n++) { for (int m=0; m<4; m++) { g_q[4*n+m] = qinf[m]; g_res[4*n+m] = 0.0f; } } } fclose(fp); nnode = compute_local_size (g_nnode, comm_size, my_rank); ncell = compute_local_size (g_ncell, comm_size, my_rank); nedge = compute_local_size (g_nedge, comm_size, my_rank); nbedge = compute_local_size (g_nbedge, comm_size, my_rank); op_printf("Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n" ,my_rank,nnode,ncell,nedge,nbedge); /*Allocate memory to hold local sets, mapping tables and data*/ cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (float *) malloc(2*nnode*sizeof(float)); q = (float *) malloc(4*ncell*sizeof(float)); qold = (float *) malloc(4*ncell*sizeof(float)); res = (float *) malloc(4*ncell*sizeof(float)); adt = (float *) malloc( ncell*sizeof(float)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4); scatter_int_array(g_edge, edge, comm_size, g_nedge,nedge, 2); scatter_int_array(g_ecell, ecell, comm_size, g_nedge,nedge, 2); scatter_int_array(g_bedge, bedge, comm_size, g_nbedge,nbedge, 2); scatter_int_array(g_becell, becell, comm_size, g_nbedge,nbedge, 1); scatter_int_array(g_bound, bound, comm_size, g_nbedge,nbedge, 1); scatter_float_array(g_x, x, comm_size, g_nnode,nnode, 2); scatter_float_array(g_q, q, comm_size, g_ncell,ncell, 4); scatter_float_array(g_qold, qold, comm_size, g_ncell,ncell, 4); scatter_float_array(g_res, res, comm_size, g_ncell,ncell, 4); scatter_float_array(g_adt, adt, comm_size, g_ncell,ncell, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if(my_rank == MPI_ROOT) { free(g_cell); free(g_edge); free(g_ecell); free(g_bedge); free(g_becell); free(g_bound); free(g_x ); free(g_q); free(g_qold); free(g_adt); free(g_res); } /**------------------------END I/O -----------------------**/ /* FIXME: It's not clear to the compiler that sth. is going on behind the scenes here. Hence theses variables are reported as unused */ op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"float",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"float",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"float",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"float",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"float",res ,"p_res"); op_decl_const(1,"float",&gam ); op_decl_const(1,"float",&gm1 ); op_decl_const(1,"float",&cfl ); op_decl_const(1,"float",&eps ); op_decl_const(1,"float",&mach ); op_decl_const(1,"float",&alpha); op_decl_const(4,"float",qinf ); op_dump_to_hdf5(file_out); op_write_const_hdf5("gam", 1,"float",(char *)&gam, "new_grid_out.h5"); op_write_const_hdf5("gm1", 1,"float",(char *)&gm1, "new_grid_out.h5"); op_write_const_hdf5("cfl", 1,"float",(char *)&cfl, "new_grid_out.h5"); op_write_const_hdf5("eps", 1,"float",(char *)&eps, "new_grid_out.h5"); op_write_const_hdf5("mach", 1,"float",(char *)&mach, "new_grid_out.h5"); op_write_const_hdf5("alpha",1,"float",(char *)&alpha,"new_grid_out.h5"); op_write_const_hdf5("qinf", 4,"float",(char *)qinf, "new_grid_out.h5"); //create halos - for sanity check op_halo_create(); op_exit(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // read in grid op_printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("./new_grid.dat","r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (double *) malloc(2*nnode*sizeof(double)); q = (double *) malloc(4*ncell*sizeof(double)); qold = (double *) malloc(4*ncell*sizeof(double)); res = (double *) malloc(4*ncell*sizeof(double)); adt = (double *) malloc( ncell*sizeof(double)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%lf %lf \n",&x[2*n], &x[2*n+1]) != 2) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); // set constants and initialise flow field and residual op_printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = qinf[m]; res[4*n+m] = 0.0f; } } // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"double",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"double",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"double",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"double",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"double",res ,"p_res"); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&cfl ); op_decl_const(1,"double",&eps ); op_decl_const(1,"double",&mach ); op_decl_const(1,"double",&alpha); op_decl_const(4,"double",qinf ); op_diagnostic_output(); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE)); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pecell,4,"double",OP_READ), op_arg_dat(p_q, 1,pecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"double",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pecell,4,"double",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"double",OP_INC )); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"double",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double) op_get_size(cells)); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); //output the result dat array to files op_print_dat_to_txtfile(p_q, "out_grid_seq.dat"); //ASCI op_print_dat_to_binfile(p_q, "out_grid_seq.bin"); //Binary op_timing_output(); op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // read in airfoil grid op_printf("reading in data \n"); FILE *fp; if ( (fp = fopen("./new_grid.dat","r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (double *) malloc(2*nnode*sizeof(double)); q = (double *) malloc(4*ncell*sizeof(double)); qold = (double *) malloc(4*ncell*sizeof(double)); res = (double *) malloc(4*ncell*sizeof(double)); adt = (double *) malloc( ncell*sizeof(double)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%lf %lf \n",&x[2*n], &x[2*n+1]) != 2) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); // declare sets, pointers, datasets op_set edges = op_decl_set(nedge, "edges"); op_set cells = op_decl_set(ncell, "cells"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_dat p_res = op_decl_dat(cells ,4,"double",res ,"p_res"); int count; op_diagnostic_output(); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); //indirect reduction count = 0; op_par_loop_res_calc("res_calc",edges, op_arg_dat(p_res,0,pecell,4,"double",OP_INC), op_arg_gbl(&count,1,"int",OP_INC)); op_printf("number of edges:: %d should be: %d \n",count,nedge); if (count != nedge) op_printf("indirect reduction FAILED\n"); else op_printf("indirect reduction PASSED\n"); //direct reduction count = 0; op_par_loop_update("update",cells, op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW), op_arg_gbl(&count,1,"int",OP_INC)); op_printf("number of cells: %d should be: %d \n",count,ncell); if (count != ncell) op_printf("direct reduction FAILED\n"); else op_printf("direct reduction PASSED\n"); op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc, char **argv){ int *becell, *ecell, *bound, *bedge, *edge, *cell; float *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; float rms; // read in grid printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("new_grid.dat","r")) == NULL) { printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (float *) malloc(2*nnode*sizeof(float)); q = (float *) malloc(4*ncell*sizeof(float)); qold = (float *) malloc(4*ncell*sizeof(float)); res = (float *) malloc(4*ncell*sizeof(float)); adt = (float *) malloc( ncell*sizeof(float)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%f %f \n",&x[2*n], &x[2*n+1]) != 2) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); // set constants and initialise flow field and residual printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; float mach = 0.4f; float alpha = 3.0f*atan(1.0f)/45.0f; float p = 1.0f; float r = 1.0f; float u = sqrt(gam*p/r)*mach; float e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = qinf[m]; res[4*n+m] = 0.0f; } } // OP initialisation printf("OP init\n"); op_init(argc,argv,7); // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"float",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"float",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"float",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"float",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"float",res ,"p_res"); op_decl_const(1,"float",&gam ); op_decl_const(1,"float",&gm1 ); op_decl_const(1,"float",&cfl ); op_decl_const(1,"float",&eps ); op_decl_const(1,"float",&mach ); op_decl_const(1,"float",&alpha); op_decl_const(4,"float",qinf ); op_tuner *OP_tuner; op_diagnostic_output(); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_WRITE), NULL); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"float",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"float",OP_WRITE), NULL); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"float",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"float",OP_READ), op_arg_dat(p_q, 0,pecell,4,"float",OP_READ), op_arg_dat(p_q, 1,pecell,4,"float",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"float",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"float",OP_READ), op_arg_dat(p_res, 0,pecell,4,"float",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"float",OP_INC ), NULL); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"float",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"float",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"float",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"float",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"float",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ), NULL); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"float",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"float",OP_READ ), op_arg_gbl(&rms,1,"float",OP_INC), NULL); } // print iteration history rms = sqrt(rms/(float) ncell); if (iter%100 == 0) printf(" %d %10.5e \n",iter,rms); } op_timing_output(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *pp; double *A, *r, *u, *du; int nnode, nedge; /**------------------------BEGIN I/O and PARTITIONING ---------------------**/ int g_nnode, g_nedge, g_n, g_e; g_nnode = (NN-1)*(NN-1); g_nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2); int *g_pp = 0; double *g_A = 0, *g_r = 0, *g_u = 0, *g_du = 0; op_printf("Global number of nodes, edges = %d, %d\n",g_nnode,g_nedge); if(my_rank == MPI_ROOT) { g_pp = (int *)malloc(sizeof(int)*2*g_nedge); g_A = (double *)malloc(sizeof(double)*g_nedge); g_r = (double *)malloc(sizeof(double)*g_nnode); g_u = (double *)malloc(sizeof(double)*g_nnode); g_du = (double *)malloc(sizeof(double)*g_nnode); // create matrix and r.h.s., and set coordinates needed for renumbering / partitioning g_e = 0; for (int i=1; i<NN; i++) { for (int j=1; j<NN; j++) { g_n = i-1 + (j-1)*(NN-1); g_r[g_n] = 0.0f; g_u[g_n] = 0.0f; g_du[g_n] = 0.0f; g_pp[2*g_e] = g_n; g_pp[2*g_e+1] = g_n; g_A[g_e] = -1.0f; g_e++; for (int pass=0; pass<4; pass++) { int i2 = i; int j2 = j; if (pass==0) i2 += -1; if (pass==1) i2 += 1; if (pass==2) j2 += -1; if (pass==3) j2 += 1; if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) { g_r[g_n] += 0.25f; } else { g_pp[2*g_e] = g_n; g_pp[2*g_e+1] = i2-1 + (j2-1)*(NN-1); g_A[g_e] = 0.25f; g_e++; } } } } } /* Compute local sizes */ nnode = compute_local_size (g_nnode, comm_size, my_rank); nedge = compute_local_size (g_nedge, comm_size, my_rank); op_printf("Number of nodes, edges on process %d = %d, %d\n" ,my_rank,nnode,nedge); /*Allocate memory to hold local sets, mapping tables and data*/ pp = (int *)malloc(2*sizeof(int)*nedge); A = (double *) malloc(nedge*sizeof(double)); r = (double *) malloc(nnode*sizeof(double)); u = (double *) malloc(nnode*sizeof(double)); du = (double *) malloc(nnode*sizeof(double)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_pp, pp, comm_size, g_nedge,nedge, 2); scatter_double_array(g_A, A, comm_size, g_nedge,nedge, 1); scatter_double_array(g_r, r, comm_size, g_nnode,nnode, 1); scatter_double_array(g_u, u, comm_size, g_nnode,nnode, 1); scatter_double_array(g_du, du, comm_size, g_nnode,nnode, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if(my_rank == MPI_ROOT) { free(g_pp); free(g_A); free(g_r); free(g_u); free(g_du); } /**------------------------END I/O and PARTITIONING ---------------------**/ // declare sets, pointers, and datasets op_set nodes = op_decl_set(nnode,"nodes"); op_set edges = op_decl_set(nedge,"edges"); op_map ppedge = op_decl_map(edges,nodes,2,pp, "ppedge"); op_dat p_A = op_decl_dat(edges,1,"double", A, "p_A" ); op_dat p_r = op_decl_dat(nodes,1,"double", r, "p_r" ); op_dat p_u = op_decl_dat(nodes,1,"double", u, "p_u" ); op_dat p_du = op_decl_dat(nodes,1,"double", du,"p_du"); alpha = 1.0f; op_decl_const(1,"double",&alpha); op_diagnostic_output(); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", NULL, NULL, NULL); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main iteration loop double u_sum, u_max, beta = 1.0f; for (int iter=0; iter<NITER; iter++) { op_par_loop(res,"res", edges, op_arg_dat(p_A, -1,OP_ID, 1,"double", OP_READ), op_arg_dat(p_u, 1,ppedge, 1,"double", OP_READ), op_arg_dat(p_du, 0,ppedge, 1,"double", OP_INC), op_arg_gbl(&beta, 1,"double", OP_READ)); u_sum = 0.0f; u_max = 0.0f; op_par_loop(update,"update", nodes, op_arg_dat(p_r, -1,OP_ID, 1,"double",OP_READ), op_arg_dat(p_du, -1,OP_ID, 1,"double",OP_RW), op_arg_dat(p_u, -1,OP_ID, 1,"double",OP_INC), op_arg_gbl(&u_sum,1,"double",OP_INC), op_arg_gbl(&u_max,1,"double",OP_MAX)); op_printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/g_nnode)); } op_timers(&cpu_t2, &wall_t2); //get results data array op_dat temp = op_mpi_get_data(p_u); //output the result dat array to files print_dat_tofile(temp, "out_grid.dat"); //ASCI //print_dat_tobinfile(temp, "out_grid.bin"); //Binary //print each mpi process's timing info for each kernel op_timing_output(); //print total time for niter interations op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); }
int main(int argc, char **argv) { int nnode = (NN+1); int *p_elem_node = (int *)malloc(2*sizeof(int)*NN); Real *p_xn = (Real *)malloc(sizeof(Real)*nnode); Real *p_x = (Real *)malloc(sizeof(Real)*nnode); Real *p_xref = (Real *)malloc(sizeof(Real)*nnode); Real *p_y = (Real *)malloc(sizeof(Real)*nnode); // create element -> node mapping for (int i = 0; i < NN; ++i) { p_elem_node[2*i] = i; p_elem_node[2*i+1] = i+1; } // create coordinates and populate x with -1/pi^2*sin(pi*x) for (int i = 0; i < nnode; ++i) { /*p_xn[i] = sin(0.5*M_PI*i/NN);*/ p_xn[i] = (Real)i/NN; p_x[i] = (1./(M_PI*M_PI))*sin(M_PI*p_xn[i]); p_xref[i] = sin(M_PI*p_xn[i]); } // OP initialisation op_init(argc,argv,2); // declare sets, pointers, and datasets op_set nodes, elements; op_map elem_node; op_dat x, y, xn, mat; op_sparsity mat_sparsity; op_decl_set(&nodes, nnode, "nodes"); op_decl_set(&elements, NN, "elements"); op_decl_map(&elem_node, &elements, &nodes, 2, p_elem_node, "elem_node"); /*dump_map(&elem_node, "map");*/ op_decl_vec(&x, &nodes, 1, sizeof(Real), p_x, "x"); op_decl_vec(&y, &nodes, 1, sizeof(Real), p_y, "y"); op_decl_vec(&xn, &nodes, 1, sizeof(Real), p_xn, "xn"); op_decl_sparsity(&mat_sparsity, &elem_node, &elem_node); /*dump_sparsity(&mat_sparsity, "sparsity");*/ op_decl_mat(&mat, &nodes, &nodes, 1, sizeof(Real), &mat_sparsity, "matrix"); /*dump_dat(&mat, "matrix");*/ op_diagnostic_output(); // Fix the values of the boundary nodes to get a unique solution Real val = 1e308; int idx = 0; op_mat_addto(&mat, &val, 1, &idx, 1, &idx); idx = NN; op_mat_addto(&mat, &val, 1, &idx, 1, &idx); // construct the matrix op_par_loop_2((void(*)(void*,void*))laplace, "laplace", &elements, op_construct_mat_arg(&mat, OP_ALL, &elem_node, OP_ALL, &elem_node, OP_INC), op_construct_vec_arg(&xn, 0, &elem_node, OP_READ)); // spmv /*op_mat_mult(&mat, &x, &y);*/ // solve op_solve(&mat, &x, &y); for (int i = 0; i < nnode; ++i) { printf("%f\n", p_x[i]); } op_exit(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; double rms; /**------------------------BEGIN I/O and PARTITIONING -------------------**/ op_timers(&cpu_t1, &wall_t1); /* read in grid from disk on root processor */ FILE *fp; if ( (fp = fopen("new_grid.dat","r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } int g_nnode,g_ncell,g_nedge,g_nbedge; check_scan(fscanf(fp,"%d %d %d %d \n",&g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4); int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0; double *g_x = 0,*g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0; // set constants op_printf("initialising flow field\n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; op_printf("reading in grid \n"); op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n" ,g_nnode,g_ncell,g_nedge,g_nbedge); if(my_rank == MPI_ROOT) { g_cell = (int *) malloc(4*g_ncell*sizeof(int)); g_edge = (int *) malloc(2*g_nedge*sizeof(int)); g_ecell = (int *) malloc(2*g_nedge*sizeof(int)); g_bedge = (int *) malloc(2*g_nbedge*sizeof(int)); g_becell = (int *) malloc( g_nbedge*sizeof(int)); g_bound = (int *) malloc( g_nbedge*sizeof(int)); g_x = (double *) malloc(2*g_nnode*sizeof(double)); g_q = (double *) malloc(4*g_ncell*sizeof(double)); g_qold = (double *) malloc(4*g_ncell*sizeof(double)); g_res = (double *) malloc(4*g_ncell*sizeof(double)); g_adt = (double *) malloc( g_ncell*sizeof(double)); for (int n=0; n<g_nnode; n++){ check_scan(fscanf(fp,"%lf %lf \n",&g_x[2*n], &g_x[2*n+1]), 2); } for (int n=0; n<g_ncell; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_cell[4*n ], &g_cell[4*n+1], &g_cell[4*n+2], &g_cell[4*n+3]), 4); } for (int n=0; n<g_nedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_edge[2*n],&g_edge[2*n+1], &g_ecell[2*n],&g_ecell[2*n+1]), 4); } for (int n=0; n<g_nbedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_bedge[2*n],&g_bedge[2*n+1], &g_becell[n],&g_bound[n]), 4); } //initialise flow field and residual for (int n=0; n<g_ncell; n++) { for (int m=0; m<4; m++) { g_q[4*n+m] = qinf[m]; g_res[4*n+m] = 0.0f; } } } fclose(fp); nnode = compute_local_size (g_nnode, comm_size, my_rank); ncell = compute_local_size (g_ncell, comm_size, my_rank); nedge = compute_local_size (g_nedge, comm_size, my_rank); nbedge = compute_local_size (g_nbedge, comm_size, my_rank); op_printf("Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n" ,my_rank,nnode,ncell,nedge,nbedge); /*Allocate memory to hold local sets, mapping tables and data*/ cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (double *) malloc(2*nnode*sizeof(double)); q = (double *) malloc(4*ncell*sizeof(double)); qold = (double *) malloc(4*ncell*sizeof(double)); res = (double *) malloc(4*ncell*sizeof(double)); adt = (double *) malloc( ncell*sizeof(double)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4); scatter_int_array(g_edge, edge, comm_size, g_nedge,nedge, 2); scatter_int_array(g_ecell, ecell, comm_size, g_nedge,nedge, 2); scatter_int_array(g_bedge, bedge, comm_size, g_nbedge,nbedge, 2); scatter_int_array(g_becell, becell, comm_size, g_nbedge,nbedge, 1); scatter_int_array(g_bound, bound, comm_size, g_nbedge,nbedge, 1); scatter_double_array(g_x, x, comm_size, g_nnode,nnode, 2); scatter_double_array(g_q, q, comm_size, g_ncell,ncell, 4); scatter_double_array(g_qold, qold, comm_size, g_ncell,ncell, 4); scatter_double_array(g_res, res, comm_size, g_ncell,ncell, 4); scatter_double_array(g_adt, adt, comm_size, g_ncell,ncell, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if(my_rank == MPI_ROOT) { free(g_cell); free(g_edge); free(g_ecell); free(g_bedge); free(g_becell); free(g_bound); free(g_x ); free(g_q); free(g_qold); free(g_adt); free(g_res); } op_timers(&cpu_t2, &wall_t2); op_printf("Max total file read time = %f\n", wall_t2-wall_t1); /**------------------------END I/O and PARTITIONING -----------------------**/ // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"double",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"double",q ,"p_q"); //op_dat p_qold = op_decl_dat(cells ,4,"double",qold ,"p_qold"); //op_dat p_adt = op_decl_dat(cells ,1,"double",adt ,"p_adt"); //op_dat p_res = op_decl_dat(cells ,4,"double",res ,"p_res"); // p_res, p_adt and p_qold now declared as a temp op_dats during // the execution of the time-marching loop op_decl_const2("gam",1,"double",&gam ); op_decl_const2("gm1",1,"double",&gm1 ); op_decl_const2("cfl",1,"double",&cfl ); op_decl_const2("eps",1,"double",&eps ); op_decl_const2("mach",1,"double",&mach ); op_decl_const2("alpha",1,"double",&alpha); op_decl_const2("qinf",4,"double",qinf ); op_diagnostic_output(); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", cells, pecell, p_x); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); niter = 1000; for(int iter=1; iter<=niter; iter++) { double* tmp_elem = NULL; op_dat p_res = op_decl_dat_temp(cells ,4,"double",tmp_elem,"p_res"); op_dat p_adt = op_decl_dat_temp(cells ,1,"double",tmp_elem,"p_adt"); op_dat p_qold = op_decl_dat_temp(cells ,4,"double",qold ,"p_qold"); //save old flow solution op_par_loop_save_soln("save_soln",cells, op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop_adt_calc("adt_calc",cells, op_arg_dat(p_x,0,pcell,2,"double",OP_READ), op_arg_dat(p_x,1,pcell,2,"double",OP_READ), op_arg_dat(p_x,2,pcell,2,"double",OP_READ), op_arg_dat(p_x,3,pcell,2,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_WRITE)); // calculate flux residual op_par_loop_res_calc("res_calc",edges, op_arg_dat(p_x,0,pedge,2,"double",OP_READ), op_arg_dat(p_x,1,pedge,2,"double",OP_READ), op_arg_dat(p_q,0,pecell,4,"double",OP_READ), op_arg_dat(p_q,1,pecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pecell,1,"double",OP_READ), op_arg_dat(p_adt,1,pecell,1,"double",OP_READ), op_arg_dat(p_res,0,pecell,4,"double",OP_INC), op_arg_dat(p_res,1,pecell,4,"double",OP_INC)); op_par_loop_bres_calc("bres_calc",bedges, op_arg_dat(p_x,0,pbedge,2,"double",OP_READ), op_arg_dat(p_x,1,pbedge,2,"double",OP_READ), op_arg_dat(p_q,0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pbecell,1,"double",OP_READ), op_arg_dat(p_res,0,pbecell,4,"double",OP_INC), op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ)); // update flow field rms = 0.0; op_par_loop_update("update",cells, op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_WRITE), op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&rms,1,"double",OP_INC)); } //print iteration history rms = sqrt(rms/(double) g_ncell); if (iter%100 == 0) op_printf("%d %10.5e \n",iter,rms); if (op_free_dat_temp(p_res) < 0) op_printf("Error: temporary op_dat %s cannot be removed\n",p_res->name); if (op_free_dat_temp(p_adt) < 0) op_printf("Error: temporary op_dat %s cannot be removed\n",p_adt->name); if (op_free_dat_temp(p_qold) < 0) op_printf("Error: temporary op_dat %s cannot be removed\n",p_qold->name); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); //print total time for niter interations op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *bnode, *cell, *g_bnode, *g_cell; double *xm, *g_xm;; int nnode,ncell,nbnodes,niter, g_nnode, g_ncell, g_nbnodes; double rms = 1; // read in grid op_printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("FE_grid.dat","r")) == NULL) { op_printf("can't open file FE_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d \n",&g_nnode, &g_ncell, &g_nbnodes) != 3) { op_printf("error reading from new_grid.dat\n"); exit(-1); } if (my_rank == MPI_ROOT) { g_cell = (int *) malloc(4*g_ncell*sizeof(int)); g_bnode = (int *) malloc(g_nbnodes*sizeof(int)); g_xm = (double *) malloc(2*g_nnode*sizeof(double)); for (int n=0; n<g_nnode; n++) { if (fscanf(fp,"%lf %lf \n",&g_xm[2*n], &g_xm[2*n+1]) != 2) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<g_ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&g_cell[4*n ], &g_cell[4*n+1], &g_cell[4*n+2], &g_cell[4*n+3]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<g_nbnodes; n++) { if (fscanf(fp,"%d \n",&g_bnode[n]) != 1) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } } fclose(fp); nnode = compute_local_size (g_nnode, comm_size, my_rank); ncell = compute_local_size (g_ncell, comm_size, my_rank); nbnodes = compute_local_size (g_nbnodes, comm_size, my_rank); cell = (int *) malloc(4*ncell*sizeof(int)); bnode = (int *) malloc(nbnodes*sizeof(int)); xm = (double *) malloc(2*nnode*sizeof(double)); scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4); scatter_int_array(g_bnode, bnode, comm_size, g_nbnodes,nbnodes, 1); scatter_double_array(g_xm, xm, comm_size, g_nnode,nnode, 2); if(my_rank == MPI_ROOT) { free(g_cell); free(g_xm); free(g_bnode); } // set constants and initialise flow field and residual op_printf("initialising flow field \n"); double gam = 1.4; gm1 = gam - 1.0; gm1i = 1.0/gm1; wtg1[0] = 0.5; wtg1[1] = 0.5; xi1[0] = 0.211324865405187; xi1[1] = 0.788675134594813; Ng1[0] = 0.788675134594813; Ng1[1] = 0.211324865405187; Ng1[2] = 0.211324865405187; Ng1[3] = 0.788675134594813; Ng1_xi[0] = -1; Ng1_xi[1] = -1; Ng1_xi[2] = 1; Ng1_xi[3] = 1; wtg2[0] = 0.25; wtg2[1] = 0.25; wtg2[2] = 0.25; wtg2[3] = 0.25; Ng2[0] = 0.622008467928146; Ng2[1] = 0.166666666666667; Ng2[2] = 0.166666666666667; Ng2[3] = 0.044658198738520; Ng2[4] = 0.166666666666667; Ng2[5] = 0.622008467928146; Ng2[6] = 0.044658198738520; Ng2[7] = 0.166666666666667; Ng2[8] = 0.166666666666667; Ng2[9] = 0.044658198738520; Ng2[10] = 0.622008467928146; Ng2[11] = 0.166666666666667; Ng2[12] = 0.044658198738520; Ng2[13] = 0.166666666666667; Ng2[14] = 0.166666666666667; Ng2[15] = 0.622008467928146; Ng2_xi[0] = -0.788675134594813; Ng2_xi[1] = 0.788675134594813; Ng2_xi[2] = -0.211324865405187;Ng2_xi[3] = 0.211324865405187; Ng2_xi[4] = -0.788675134594813; Ng2_xi[5] = 0.788675134594813; Ng2_xi[6] = -0.211324865405187; Ng2_xi[7] = 0.211324865405187; Ng2_xi[8] = -0.211324865405187; Ng2_xi[9] = 0.211324865405187; Ng2_xi[10] = -0.788675134594813; Ng2_xi[11] = 0.788675134594813; Ng2_xi[12] = -0.211324865405187; Ng2_xi[13] = 0.211324865405187; Ng2_xi[14] = -0.788675134594813; Ng2_xi[15] = 0.788675134594813; Ng2_xi[16] = -0.788675134594813; Ng2_xi[17] = -0.211324865405187; Ng2_xi[18] = 0.788675134594813; Ng2_xi[19] = 0.211324865405187; Ng2_xi[20] = -0.211324865405187; Ng2_xi[21] = -0.788675134594813; Ng2_xi[22] = 0.211324865405187; Ng2_xi[23] = 0.788675134594813; Ng2_xi[24] = -0.788675134594813; Ng2_xi[25] = -0.211324865405187; Ng2_xi[26] = 0.788675134594813; Ng2_xi[27] = 0.211324865405187; Ng2_xi[28] = -0.211324865405187; Ng2_xi[29] = -0.788675134594813; Ng2_xi[30] = 0.211324865405187; Ng2_xi[31] = 0.788675134594813; minf = 0.1; m2 = minf*minf; freq = 1; kappa = 1; nmode = 0; mfan = 1.0; double *phim = (double *)malloc(nnode*sizeof(double)); memset(phim,0,nnode*sizeof(double)); for (int i = 0;i<nnode;i++) { phim[i] = minf*xm[2*i]; } double *K = (double *)malloc(4*4*ncell*sizeof(double)); memset(K,0,4*4*ncell*sizeof(double)); double *resm = (double *)malloc(nnode*sizeof(double)); memset(resm,0,nnode*sizeof(double)); double *V = (double *)malloc(nnode*sizeof(double)); memset(V,0,nnode*sizeof(double)); double *P = (double *)malloc(nnode*sizeof(double)); memset(P,0,nnode*sizeof(double)); double *U = (double *)malloc(nnode*sizeof(double)); memset(U,0,nnode*sizeof(double)); // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set bnodes = op_decl_set(nbnodes, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pbnodes = op_decl_map(bnodes,nodes,1,bnode, "pbedge"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_xm = op_decl_dat(nodes ,2,"double",xm ,"p_x"); op_dat p_phim = op_decl_dat(nodes, 1, "double", phim, "p_phim"); op_dat p_resm = op_decl_dat(nodes, 1, "double", resm, "p_resm"); op_dat p_K = op_decl_dat(cells, 16, "double:soa", K, "p_K"); op_dat p_V = op_decl_dat(nodes, 1, "double", V, "p_V"); op_dat p_P = op_decl_dat(nodes, 1, "double", P, "p_P"); op_dat p_U = op_decl_dat(nodes, 1, "double", U, "p_U"); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&gm1i ); op_decl_const(1,"double",&m2 ); op_decl_const(2,"double",wtg1 ); op_decl_const(2,"double",xi1 ); op_decl_const(4,"double",Ng1 ); op_decl_const(4,"double",Ng1_xi ); op_decl_const(4,"double",wtg2 ); op_decl_const(16,"double",Ng2 ); op_decl_const(32,"double",Ng2_xi ); op_decl_const(1,"double",&minf ); op_decl_const(1,"double",&freq ); op_decl_const(1,"double",&kappa ); op_decl_const(1,"double",&nmode ); op_decl_const(1,"double",&mfan ); op_diagnostic_output(); op_partition("PTSCOTCH", "KWAY", cells, pcell, NULL); // main time-marching loop niter = 20; //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); for(int iter=1; iter<=niter; iter++) { op_par_loop(res_calc,"res_calc",cells, op_arg_dat(p_xm, -4, pcell, 2,"double",OP_READ), op_arg_dat(p_phim, -4, pcell, 1,"double",OP_READ), op_arg_dat(p_K, -1, OP_ID, 16,"double:soa",OP_WRITE), op_arg_dat(p_resm, -4, pcell, 1,"double",OP_INC) ); op_par_loop(dirichlet,"dirichlet",bnodes, op_arg_dat(p_resm, 0, pbnodes, 1,"double",OP_WRITE)); double c1 = 0; double c2 = 0; double c3 = 0; double alpha = 0; double beta = 0; //c1 = R'*R; op_par_loop(init_cg, "init_cg", nodes, op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&c1, 1, "double", OP_INC), op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_WRITE), op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_WRITE), op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_WRITE)); //set up stopping conditions double res0 = sqrt(c1); double res = res0; int inner_iter = 0; int maxiter = 200; while (res > 0.1*res0 && inner_iter < maxiter) { //V = Stiffness*P op_par_loop(spMV, "spMV", cells, op_arg_dat(p_V, -4, pcell, 1, "double", OP_INC), op_arg_dat(p_K, -1, OP_ID, 16, "double:soa", OP_READ), op_arg_dat(p_P, -4, pcell, 1, "double", OP_READ)); op_par_loop(dirichlet,"dirichlet",bnodes, op_arg_dat(p_V, 0, pbnodes, 1,"double",OP_WRITE)); c2 = 0; //c2 = P'*V; op_par_loop(dotPV, "dotPV", nodes, op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_READ), op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&c2, 1, "double", OP_INC)); alpha = c1/c2; //U = U + alpha*P; //resm = resm-alpha*V; op_par_loop(updateUR, "updateUR", nodes, op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_INC), op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_INC), op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_READ), op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_RW), op_arg_gbl(&alpha, 1, "double", OP_READ)); c3 = 0; //c3 = resm'*resm; op_par_loop(dotR, "dotR", nodes, op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&c3, 1, "double", OP_INC)); beta = c3/c1; //P = beta*P+resm; op_par_loop(updateP, "updateP", nodes, op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ), op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_RW), op_arg_gbl(&beta, 1, "double", OP_READ)); c1 = c3; res = sqrt(c1); inner_iter++; } rms = 0; //phim = phim - Stiffness\Load; op_par_loop(update, "update", nodes, op_arg_dat(p_phim, -1, OP_ID, 1, "double", OP_RW), op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_WRITE), op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&rms, 1, "double", OP_INC)); op_printf("rms = %10.5e iter: %d\n", sqrt(rms)/sqrt(g_nnode), inner_iter); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); /*free(cell); free(bnode); free(xm); free(phim); free(K); free(resm); free(V); free(P); free(U);*/ }