static int test_op_init(){ struct op oper; op_init(&oper, Cloneable_clone_func,NULL); if (op_run(&oper) != NULL){ fprintf(stderr,"test_op_init failing\n"); return -1; } return 0; }
int main(int argc, char** argv) { socksserver srv; static const char defaultip[] = "127.0.0.1"; op_state opt_storage, *opt = &opt_storage; op_init(opt, argc, argv); SPDECLAREC(o_port, op_get(opt, SPL("port"))); SPDECLAREC(o_listenip, op_get(opt, SPL("listenip"))); int log; if(CONFIG_LOG) { SPDECLAREC(o_log, op_get(opt, SPL("log"))); log = o_log->size ? strtoint(o_log->ptr, o_log->size) : 1; } else log = 0; int uid, gid; if(CONFIG_IDSWITCH) { SPDECLAREC(o_uid, op_get(opt, SPL("uid"))); SPDECLAREC(o_gid, op_get(opt, SPL("gid"))); uid = o_uid->size ? strtoint(o_uid->ptr, o_uid->size) : -1; gid = o_gid->size ? strtoint(o_gid->ptr, o_gid->size) : -1; } else { uid = -1; gid = -1; } SPDECLAREC(o_user, op_get(opt, SPL("user"))); SPDECLAREC(o_pass, op_get(opt, SPL("pass"))); char* ip = o_listenip->size ? o_listenip->ptr : (char*) defaultip; int port = o_port->size ? strtoint(o_port->ptr, o_port->size) : 1080; if(CONFIG_LOG && op_hasflag(opt, SPLITERAL("-help"))) syntax(opt); if((o_user->size && (!o_pass->size || o_user->size > 255)) || (o_pass->size && (!o_user->size || o_pass->size > 255))) { LOGPUTS(1, SPL("fatal: username or password exceeding 255 chars, or only one of both set\n")); return 1; } if(CONFIG_DAEMONIZE && op_hasflag(opt, SPL("d"))) daemonize(); socksserver_init(&srv, ip, port, log, o_user, o_pass, uid, gid); return 0; }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // read in grid op_printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("./new_grid.dat","r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (double *) malloc(2*nnode*sizeof(double)); q = (double *) malloc(4*ncell*sizeof(double)); qold = (double *) malloc(4*ncell*sizeof(double)); res = (double *) malloc(4*ncell*sizeof(double)); adt = (double *) malloc( ncell*sizeof(double)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%lf %lf \n",&x[2*n], &x[2*n+1]) != 2) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); // set constants and initialise flow field and residual op_printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = qinf[m]; res[4*n+m] = 0.0f; } } // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"double",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"double",q ,"p_q"); //op_dat p_qold = op_decl_dat(cells ,4,"double",qold ,"p_qold"); //op_dat p_adt = op_decl_dat(cells ,1,"double",adt ,"p_adt"); //op_dat p_res = op_decl_dat(cells ,4,"double",res ,"p_res"); // p_res, p_adt and p_qold now declared as a temp op_dats during // the execution of the time-marching loop op_decl_const2("gam",1,"double",&gam); op_decl_const2("gm1",1,"double",&gm1); op_decl_const2("cfl",1,"double",&cfl); op_decl_const2("eps",1,"double",&eps); op_decl_const2("mach",1,"double",&mach); op_decl_const2("alpha",1,"double",&alpha); op_decl_const2("qinf",4,"double",qinf); op_diagnostic_output(); double g_ncell = op_get_size(cells); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { double* tmp_elem = NULL; op_dat p_res = op_decl_dat_temp(cells ,4,"double",tmp_elem,"p_res"); op_dat p_adt = op_decl_dat_temp(cells ,1,"double",tmp_elem,"p_adt"); op_dat p_qold = op_decl_dat_temp(cells ,4,"double",qold ,"p_qold"); // save old flow solution op_par_loop_save_soln("save_soln",cells, op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop_adt_calc("adt_calc",cells, op_arg_dat(p_x,0,pcell,2,"double",OP_READ), op_arg_dat(p_x,1,pcell,2,"double",OP_READ), op_arg_dat(p_x,2,pcell,2,"double",OP_READ), op_arg_dat(p_x,3,pcell,2,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_WRITE)); // calculate flux residual op_par_loop_res_calc("res_calc",edges, op_arg_dat(p_x,0,pedge,2,"double",OP_READ), op_arg_dat(p_x,1,pedge,2,"double",OP_READ), op_arg_dat(p_q,0,pecell,4,"double",OP_READ), op_arg_dat(p_q,1,pecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pecell,1,"double",OP_READ), op_arg_dat(p_adt,1,pecell,1,"double",OP_READ), op_arg_dat(p_res,0,pecell,4,"double",OP_INC), op_arg_dat(p_res,1,pecell,4,"double",OP_INC)); op_par_loop_bres_calc("bres_calc",bedges, op_arg_dat(p_x,0,pbedge,2,"double",OP_READ), op_arg_dat(p_x,1,pbedge,2,"double",OP_READ), op_arg_dat(p_q,0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pbecell,1,"double",OP_READ), op_arg_dat(p_res,0,pbecell,4,"double",OP_INC), op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ)); // update flow field rms = 0.0; op_par_loop_update("update",cells, op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_WRITE), op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double)g_ncell ); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); if (iter%1000 == 0 && g_ncell == 720000){ //defailt mesh -- for validation testing //op_printf(" %d %3.16f \n",iter,rms); double diff=fabs((100.0*(rms/0.0001060114637578))-100.0); op_printf("\n\nTest problem with %d cells is within %3.15E %% of the expected solution\n",720000, diff); if(diff < 0.00001) { op_printf("This test is considered PASSED\n"); } else { op_printf("This test is considered FAILED\n"); } } if (op_free_dat_temp(p_res) < 0) op_printf("Error: temporary op_dat %s cannot be removed\n",p_res->name); if (op_free_dat_temp(p_adt) < 0) op_printf("Error: temporary op_dat %s cannot be removed\n",p_adt->name); if (op_free_dat_temp(p_qold) < 0) op_printf("Error: temporary op_dat %s cannot be removed\n",p_qold->name); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // read in airfoil grid op_printf("reading in data \n"); FILE *fp; if ( (fp = fopen("./new_grid.dat","r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (double *) malloc(2*nnode*sizeof(double)); q = (double *) malloc(4*ncell*sizeof(double)); qold = (double *) malloc(4*ncell*sizeof(double)); res = (double *) malloc(4*ncell*sizeof(double)); adt = (double *) malloc( ncell*sizeof(double)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%lf %lf \n",&x[2*n], &x[2*n+1]) != 2) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); // declare sets, pointers, datasets op_set edges = op_decl_set(nedge, "edges"); op_set cells = op_decl_set(ncell, "cells"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_dat p_res = op_decl_dat(cells ,4,"double",res ,"p_res"); int count; op_diagnostic_output(); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); //indirect reduction count = 0; op_par_loop_res_calc("res_calc",edges, op_arg_dat(p_res,0,pecell,4,"double",OP_INC), op_arg_gbl(&count,1,"int",OP_INC)); op_printf("number of edges:: %d should be: %d \n",count,nedge); if (count != nedge) op_printf("indirect reduction FAILED\n"); else op_printf("indirect reduction PASSED\n"); //direct reduction count = 0; op_par_loop_update("update",cells, op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW), op_arg_gbl(&count,1,"int",OP_INC)); op_printf("number of cells: %d should be: %d \n",count,ncell); if (count != ncell) op_printf("direct reduction FAILED\n"); else op_printf("direct reduction PASSED\n"); op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,5); int nnode, nedge, n, e; nnode = (NN-1)*(NN-1); nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2); int *pp = (int *)malloc(sizeof(int)*2*nedge); float *A = (float *)malloc(sizeof(float)*nedge); float *r = (float *)malloc(sizeof(float)*nnode); float *u = (float *)malloc(sizeof(float)*nnode); float *du = (float *)malloc(sizeof(float)*nnode); // create matrix and r.h.s., and set coordinates needed for renumbering / partitioning e = 0; for (int i=1; i<NN; i++) { for (int j=1; j<NN; j++) { n = i-1 + (j-1)*(NN-1); r[n] = 0.0f; u[n] = 0.0f; du[n] = 0.0f; pp[2*e] = n; pp[2*e+1] = n; A[e] = -1.0f; e++; for (int pass=0; pass<4; pass++) { int i2 = i; int j2 = j; if (pass==0) i2 += -1; if (pass==1) i2 += 1; if (pass==2) j2 += -1; if (pass==3) j2 += 1; if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) { r[n] += 0.25f; } else { pp[2*e] = n; pp[2*e+1] = i2-1 + (j2-1)*(NN-1); A[e] = 0.25f; e++; } } } } // declare sets, pointers, and datasets op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_map ppedge = op_decl_map(edges,nodes,2,pp, "ppedge"); op_dat p_A = op_decl_dat(edges,1,"float",A, "p_A" ); op_dat p_r = op_decl_dat(nodes,1,"float",r, "p_r" ); op_dat p_u = op_decl_dat(nodes,1,"float",u, "p_u" ); op_dat p_du = op_decl_dat(nodes,1,"float",du, "p_du"); alpha = 1.0f; op_decl_const2("alpha",1,"float",&alpha); op_diagnostic_output(); // main iteration loop float u_sum, u_max, beta = 1.0f; for (int iter=0; iter<NITER; iter++) { op_par_loop_res("res",edges, op_arg_dat(p_A,-1,OP_ID,1,"float",OP_READ), op_arg_dat(p_u,1,ppedge,1,"float",OP_READ), op_arg_dat(p_du,0,ppedge,1,"float",OP_INC), op_arg_gbl(&beta,1,"float",OP_READ)); u_sum = 0.0f; u_max = 0.0f; op_par_loop_update("update",nodes, op_arg_dat(p_r,-1,OP_ID,1,"float",OP_READ), op_arg_dat(p_du,-1,OP_ID,1,"float",OP_RW), op_arg_dat(p_u,-1,OP_ID,1,"float",OP_INC), op_arg_gbl(&u_sum,1,"float",OP_INC), op_arg_gbl(&u_max,1,"float",OP_MAX)); op_printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/nnode)); } // print out results op_printf("\n Results after %d iterations:\n\n",NITER); op_fetch_data(p_u, u); for (int pass=0; pass<1; pass++) { for (int j=NN-1; j>0; j--) { for (int i=1; i<NN; i++) { if (pass==0) op_printf(" %7.4f",u[i-1 + (j-1)*(NN-1)]); else if (pass==1) op_printf(" %7.4f",du[i-1 + (j-1)*(NN-1)]); else if (pass==2) op_printf(" %7.4f",r[i-1 + (j-1)*(NN-1)]); } op_printf("\n"); } op_printf("\n"); } op_timing_output(); int result = check_result<float>(u, NN, TOLERANCE); op_exit(); free(pp); free(A); free(u); free(du); free(r); return result; }
int main(int argc, char **argv){ int *becell, *ecell, *bound, *bedge, *edge, *cell; float *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; float rms; // read in grid printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("new_grid.dat","r")) == NULL) { printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (float *) malloc(2*nnode*sizeof(float)); q = (float *) malloc(4*ncell*sizeof(float)); qold = (float *) malloc(4*ncell*sizeof(float)); res = (float *) malloc(4*ncell*sizeof(float)); adt = (float *) malloc( ncell*sizeof(float)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%f %f \n",&x[2*n], &x[2*n+1]) != 2) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); // set constants and initialise flow field and residual printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; float mach = 0.4f; float alpha = 3.0f*atan(1.0f)/45.0f; float p = 1.0f; float r = 1.0f; float u = sqrt(gam*p/r)*mach; float e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = qinf[m]; res[4*n+m] = 0.0f; } } // OP initialisation printf("OP init\n"); op_init(argc,argv,7); // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"float",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"float",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"float",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"float",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"float",res ,"p_res"); op_decl_const(1,"float",&gam ); op_decl_const(1,"float",&gm1 ); op_decl_const(1,"float",&cfl ); op_decl_const(1,"float",&eps ); op_decl_const(1,"float",&mach ); op_decl_const(1,"float",&alpha); op_decl_const(4,"float",qinf ); op_tuner *OP_tuner; op_diagnostic_output(); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_WRITE), NULL); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"float",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"float",OP_WRITE), NULL); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"float",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"float",OP_READ), op_arg_dat(p_q, 0,pecell,4,"float",OP_READ), op_arg_dat(p_q, 1,pecell,4,"float",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"float",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"float",OP_READ), op_arg_dat(p_res, 0,pecell,4,"float",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"float",OP_INC ), NULL); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"float",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"float",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"float",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"float",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"float",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ), NULL); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"float",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"float",OP_READ ), op_arg_gbl(&rms,1,"float",OP_INC), NULL); } // print iteration history rms = sqrt(rms/(float) ncell); if (iter%100 == 0) printf(" %d %10.5e \n",iter,rms); } op_timing_output(); }
// // main program // int main(int argc, char **argv){ int my_rank; int comm_size; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; double time; double max_time; int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int niter; double rms; op_timers(&cpu_t1, &wall_t1); // set constants if(my_rank == MPI_ROOT )printf("initialising flow field\n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; // OP initialisation op_init(argc,argv,2); /**------------------------BEGIN Parallel I/O -------------------**/ char file[] = "new_grid.h5";//"new_grid-26mil.h5";//"new_grid.h5"; // declare sets, pointers, datasets and global constants - reading in from file op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell"); op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x"); op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q"); op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt"); op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res"); /**------------------------END Parallel I/O -----------------------**/ op_timers(&cpu_t2, &wall_t2); time = wall_t2-wall_t1; MPI_Reduce(&time,&max_time,1,MPI_DOUBLE, MPI_MAX,MPI_ROOT, MPI_COMM_WORLD); if(my_rank==MPI_ROOT)printf("Max total file read time = %f\n",max_time); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&cfl ); op_decl_const(1,"double",&eps ); op_decl_const(1,"double",&mach ); op_decl_const(1,"double",&alpha); op_decl_const(4,"double",qinf ); op_diagnostic_output(); //write back original data just to compare you read the file correctly //do an h5diff between new_grid_writeback.h5 and new_grid.h5 to //compare two hdf5 files op_write_hdf5("new_grid_out.h5"); //partition with ParMetis //op_partition_geom(p_x); //op_partition_random(cells); //op_partition_kway(pecell); //op_partition_geomkway(p_x, pcell); //partition with PT-Scotch op_partition_ptscotch(pecell); //create halos op_halo_create(); int g_ncell = 0; int* sizes = (int *)malloc(sizeof(int)*comm_size); MPI_Allgather(&cells->size, 1, MPI_INT, sizes, 1, MPI_INT, MPI_COMM_WORLD); for(int i = 0; i<comm_size; i++)g_ncell = g_ncell + sizes[i]; free(sizes); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); niter = 1000; for(int iter=1; iter<=niter; iter++) { //save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE)); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pecell,4,"double",OP_READ), op_arg_dat(p_q, 1,pecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"double",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pecell,4,"double",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"double",OP_INC )); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"double",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ), op_arg_gbl(&rms,1,"double",OP_INC)); } //print iteration history if(my_rank==MPI_ROOT) { rms = sqrt(rms/(double) g_ncell); if (iter%100 == 0) printf("%d %10.5e \n",iter,rms); } } op_timers(&cpu_t2, &wall_t2); //get results data array op_dat temp = op_mpi_get_data(p_q); //output the result dat array to files //op_write_hdf5("new_grid_out.h5"); //compress using // ~/hdf5/bin/h5repack -f GZIP=9 new_grid.h5 new_grid_pack.h5 //free memory allocated to halos op_halo_destroy(); //return all op_dats, op_maps back to original element order op_partition_reverse(); //print each mpi process's timing info for each kernel op_mpi_timing_output(); //print total time for niter interations time = wall_t2-wall_t1; MPI_Reduce(&time,&max_time,1,MPI_DOUBLE, MPI_MAX,MPI_ROOT, MPI_COMM_WORLD); if(my_rank==MPI_ROOT)printf("Max total runtime = %f\n",max_time); op_exit(); MPI_Finalize(); //user mpi finalize }
int main(int argc, char **argv) { // OP initialisation op_init(argc, argv, 2); // MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); // timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode, ncell, nedge, nbedge, niter; /**------------------------BEGIN I/O and PARTITIONING -------------------**/ op_timers(&cpu_t1, &wall_t1); /* read in grid from disk on root processor */ FILE *fp; if ((fp = fopen("new_grid.dat", "r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } int g_nnode, g_ncell, g_nedge, g_nbedge; check_scan( fscanf(fp, "%d %d %d %d \n", &g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4); int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0; double *g_x = 0, *g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0; op_printf("reading in grid \n"); op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n", g_nnode, g_ncell, g_nedge, g_nbedge); if (my_rank == MPI_ROOT) { g_cell = (int *)malloc(4 * g_ncell * sizeof(int)); g_edge = (int *)malloc(2 * g_nedge * sizeof(int)); g_ecell = (int *)malloc(2 * g_nedge * sizeof(int)); g_bedge = (int *)malloc(2 * g_nbedge * sizeof(int)); g_becell = (int *)malloc(g_nbedge * sizeof(int)); g_bound = (int *)malloc(g_nbedge * sizeof(int)); g_x = (double *)malloc(2 * g_nnode * sizeof(double)); g_q = (double *)malloc(4 * g_ncell * sizeof(double)); g_qold = (double *)malloc(4 * g_ncell * sizeof(double)); g_res = (double *)malloc(4 * g_ncell * sizeof(double)); g_adt = (double *)malloc(g_ncell * sizeof(double)); for (int n = 0; n < g_nnode; n++) { check_scan(fscanf(fp, "%lf %lf \n", &g_x[2 * n], &g_x[2 * n + 1]), 2); } for (int n = 0; n < g_ncell; n++) { check_scan(fscanf(fp, "%d %d %d %d \n", &g_cell[4 * n], &g_cell[4 * n + 1], &g_cell[4 * n + 2], &g_cell[4 * n + 3]), 4); } for (int n = 0; n < g_nedge; n++) { check_scan(fscanf(fp, "%d %d %d %d \n", &g_edge[2 * n], &g_edge[2 * n + 1], &g_ecell[2 * n], &g_ecell[2 * n + 1]), 4); } for (int n = 0; n < g_nbedge; n++) { check_scan(fscanf(fp, "%d %d %d %d \n", &g_bedge[2 * n], &g_bedge[2 * n + 1], &g_becell[n], &g_bound[n]), 4); } // initialise flow field and residual } fclose(fp); nnode = compute_local_size(g_nnode, comm_size, my_rank); ncell = compute_local_size(g_ncell, comm_size, my_rank); nedge = compute_local_size(g_nedge, comm_size, my_rank); nbedge = compute_local_size(g_nbedge, comm_size, my_rank); op_printf( "Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n", my_rank, nnode, ncell, nedge, nbedge); /*Allocate memory to hold local sets, mapping tables and data*/ cell = (int *)malloc(4 * ncell * sizeof(int)); edge = (int *)malloc(2 * nedge * sizeof(int)); ecell = (int *)malloc(2 * nedge * sizeof(int)); bedge = (int *)malloc(2 * nbedge * sizeof(int)); becell = (int *)malloc(nbedge * sizeof(int)); bound = (int *)malloc(nbedge * sizeof(int)); x = (double *)malloc(2 * nnode * sizeof(double)); q = (double *)malloc(4 * ncell * sizeof(double)); qold = (double *)malloc(4 * ncell * sizeof(double)); res = (double *)malloc(4 * ncell * sizeof(double)); adt = (double *)malloc(ncell * sizeof(double)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_cell, cell, comm_size, g_ncell, ncell, 4); scatter_int_array(g_edge, edge, comm_size, g_nedge, nedge, 2); scatter_int_array(g_ecell, ecell, comm_size, g_nedge, nedge, 2); scatter_int_array(g_bedge, bedge, comm_size, g_nbedge, nbedge, 2); scatter_int_array(g_becell, becell, comm_size, g_nbedge, nbedge, 1); scatter_int_array(g_bound, bound, comm_size, g_nbedge, nbedge, 1); scatter_double_array(g_x, x, comm_size, g_nnode, nnode, 2); scatter_double_array(g_q, q, comm_size, g_ncell, ncell, 4); scatter_double_array(g_qold, qold, comm_size, g_ncell, ncell, 4); scatter_double_array(g_res, res, comm_size, g_ncell, ncell, 4); scatter_double_array(g_adt, adt, comm_size, g_ncell, ncell, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if (my_rank == MPI_ROOT) { free(g_cell); free(g_edge); free(g_ecell); free(g_bedge); free(g_becell); free(g_bound); free(g_x); free(g_q); free(g_qold); free(g_adt); free(g_res); } op_timers(&cpu_t2, &wall_t2); op_printf("Max total file read time = %f\n", wall_t2 - wall_t1); /**------------------------END I/O and PARTITIONING -----------------------**/ op_set edges = op_decl_set(nedge, "edges"); op_set cells = op_decl_set(ncell, "cells"); op_map pecell = op_decl_map(edges, cells, 2, ecell, "pecell"); op_dat p_res = op_decl_dat(cells, 4, "double", res, "p_res"); int count; // trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", cells, pecell, NULL); op_diagnostic_output(); // initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // indirect reduction count = 0; op_par_loop_res_calc("res_calc", edges, op_arg_dat(p_res, 0, pecell, 4, "double", OP_INC), op_arg_gbl(&count, 1, "int", OP_INC)); op_printf("number of edges:: %d should be: %d \n", count, g_nedge); if (count != g_nedge) op_printf("indirect reduction FAILED\n"); else op_printf("indirect reduction PASSED\n"); // direct reduction count = 0; op_par_loop_update("update", cells, op_arg_dat(p_res, -1, OP_ID, 4, "double", OP_RW), op_arg_gbl(&count, 1, "int", OP_INC)); op_printf("number of cells: %d should be: %d \n", count, g_ncell); if (count != g_ncell) op_printf("direct reduction FAILED\n"); else op_printf("direct reduction PASSED\n"); op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc, char **argv){ int nnode, nedge, n, e; float dx; op_set nodes, edges; op_map ppedge; op_dat p_A, p_r, p_u, p_du, p_beta, p_u_sum, p_u_max; nnode = (NN-1)*(NN-1); nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2); dx = 1.0f / ((float) NN); int *pp = (int *)malloc(sizeof(int)*2*nedge); float *A = (float *)malloc(sizeof(float)*nedge); float *r = (float *)malloc(sizeof(float)*nnode); float *u = (float *)malloc(sizeof(float)*nnode); float *du = (float *)malloc(sizeof(float)*nnode); /* create matrix and r.h.s., and set coordinates needed for renumbering / partitioning */ e = 0; for (int i=1; i<NN; i++) { for (int j=1; j<NN; j++) { n = i-1 + (j-1)*(NN-1); r[n] = 0.0f; u[n] = 0.0f; du[n] = 0.0f; pp[2*e] = n; pp[2*e+1] = n; A[e] = -1.0f; e++; for (int pass=0; pass<4; pass++) { int i2 = i; int j2 = j; if (pass==0) i2 += -1; if (pass==1) i2 += 1; if (pass==2) j2 += -1; if (pass==3) j2 += 1; if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) { r[n] += 0.25f; } else { pp[2*e] = n; pp[2*e+1] = i2-1 + (j2-1)*(NN-1); A[e] = 0.25f; e++; } } } } float u_sum, u_max, beta = 1.0f; /* OP initialisation */ op_init(argc,argv,5); /* declare sets, pointers, and datasets */ op_decl_set(&nodes,nnode, "nodes"); op_decl_set(&edges,nedge, "edges"); op_decl_map(&ppedge,&edges,&nodes,2,pp, "ppedge"); op_decl_vec(&p_A, &edges,1,sizeof(float), A, "p_A" ); op_decl_vec(&p_r, &nodes,1,sizeof(float), r, "p_r" ); op_decl_vec(&p_u, &nodes,1,sizeof(float), u, "p_u" ); op_decl_vec(&p_du,&nodes,1,sizeof(float), du, "p_du"); op_decl_gbl(&p_beta, 1,sizeof(float), &beta, "p_beta"); op_decl_gbl(&p_u_sum, 1,sizeof(float), &u_sum, "p_u_sum"); op_decl_gbl(&p_u_max, 1,sizeof(float), &u_max, "p_u_max"); alpha = 1.0f; op_decl_const(&alpha,1,sizeof(float)); op_diagnostic_output(); /* main iteration loop */ for (int iter=0; iter<NITER; iter++) { op_par_loop_4((void(*)(void*,void*,void*,void*))res,"res", &edges, op_construct_vec_arg(&p_A, OP_NONE, NULL, OP_READ), op_construct_vec_arg(&p_u, 1,&ppedge, OP_READ), op_construct_vec_arg(&p_du, 0,&ppedge, OP_INC), op_construct_gbl_arg(&p_beta,OP_READ)); u_sum = 0.0f; u_max = 0.0f; op_par_loop_5((void(*)(void*,void*,void*,void*,void*))update,"update", &nodes, op_construct_vec_arg(&p_r, OP_NONE, NULL, OP_READ), op_construct_vec_arg(&p_du, OP_NONE, NULL, OP_RW), op_construct_vec_arg(&p_u, OP_NONE, NULL, OP_INC), op_construct_gbl_arg(&p_u_sum,OP_INC), op_construct_gbl_arg(&p_u_max,OP_MAX)); printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/nnode)); } /* print out results */ printf("\n Results after %d iterations:\n\n",NITER); op_fetch_data(p_u); /* op_fetch_data(p_du); op_fetch_data(p_r); */ for (int pass=0; pass<1; pass++) { /* if(pass==0) printf("\narray u\n"); else if(pass==1) printf("\narray du\n"); else if(pass==2) printf("\narray r\n"); */ for (int j=NN-1; j>0; j--) { for (int i=1; i<NN; i++) { if (pass==0) printf(" %7.4f",u[i-1 + (j-1)*(NN-1)]); else if (pass==1) printf(" %7.4f",du[i-1 + (j-1)*(NN-1)]); else if (pass==2) printf(" %7.4f",r[i-1 + (j-1)*(NN-1)]); } printf("\n"); } printf("\n"); } op_timing_output(); op_exit(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); int *becell, *ecell, *bound, *bedge, *edge, *cell; float *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge; /**------------------------BEGIN I/O -------------------**/ char file[] = "new_grid.dat"; char file_out[] = "new_grid_out.h5"; /* read in grid from disk on root processor */ FILE *fp; if ( (fp = fopen(file,"r")) == NULL) { op_printf("can't open file %s\n",file); exit(-1); } int g_nnode,g_ncell,g_nedge,g_nbedge; check_scan(fscanf(fp,"%d %d %d %d \n",&g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4); int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0; float *g_x = 0,*g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0; // set constants op_printf("initialising flow field\n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; float mach = 0.4f; float alpha = 3.0f*atan(1.0f)/45.0f; float p = 1.0f; float r = 1.0f; float u = sqrt(gam*p/r)*mach; float e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; op_printf("reading in grid \n"); op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n" ,g_nnode,g_ncell,g_nedge,g_nbedge); if(my_rank == MPI_ROOT) { g_cell = (int *) malloc(4*g_ncell*sizeof(int)); g_edge = (int *) malloc(2*g_nedge*sizeof(int)); g_ecell = (int *) malloc(2*g_nedge*sizeof(int)); g_bedge = (int *) malloc(2*g_nbedge*sizeof(int)); g_becell = (int *) malloc( g_nbedge*sizeof(int)); g_bound = (int *) malloc( g_nbedge*sizeof(int)); g_x = (float *) malloc(2*g_nnode*sizeof(float)); g_q = (float *) malloc(4*g_ncell*sizeof(float)); g_qold = (float *) malloc(4*g_ncell*sizeof(float)); g_res = (float *) malloc(4*g_ncell*sizeof(float)); g_adt = (float *) malloc( g_ncell*sizeof(float)); for (int n=0; n<g_nnode; n++){ check_scan(fscanf(fp,"%f %f \n",&g_x[2*n], &g_x[2*n+1]), 2); } for (int n=0; n<g_ncell; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_cell[4*n ], &g_cell[4*n+1], &g_cell[4*n+2], &g_cell[4*n+3]), 4); } for (int n=0; n<g_nedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_edge[2*n],&g_edge[2*n+1], &g_ecell[2*n],&g_ecell[2*n+1]), 4); } for (int n=0; n<g_nbedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_bedge[2*n],&g_bedge[2*n+1], &g_becell[n],&g_bound[n]), 4); } //initialise flow field and residual for (int n=0; n<g_ncell; n++) { for (int m=0; m<4; m++) { g_q[4*n+m] = qinf[m]; g_res[4*n+m] = 0.0f; } } } fclose(fp); nnode = compute_local_size (g_nnode, comm_size, my_rank); ncell = compute_local_size (g_ncell, comm_size, my_rank); nedge = compute_local_size (g_nedge, comm_size, my_rank); nbedge = compute_local_size (g_nbedge, comm_size, my_rank); op_printf("Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n" ,my_rank,nnode,ncell,nedge,nbedge); /*Allocate memory to hold local sets, mapping tables and data*/ cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (float *) malloc(2*nnode*sizeof(float)); q = (float *) malloc(4*ncell*sizeof(float)); qold = (float *) malloc(4*ncell*sizeof(float)); res = (float *) malloc(4*ncell*sizeof(float)); adt = (float *) malloc( ncell*sizeof(float)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4); scatter_int_array(g_edge, edge, comm_size, g_nedge,nedge, 2); scatter_int_array(g_ecell, ecell, comm_size, g_nedge,nedge, 2); scatter_int_array(g_bedge, bedge, comm_size, g_nbedge,nbedge, 2); scatter_int_array(g_becell, becell, comm_size, g_nbedge,nbedge, 1); scatter_int_array(g_bound, bound, comm_size, g_nbedge,nbedge, 1); scatter_float_array(g_x, x, comm_size, g_nnode,nnode, 2); scatter_float_array(g_q, q, comm_size, g_ncell,ncell, 4); scatter_float_array(g_qold, qold, comm_size, g_ncell,ncell, 4); scatter_float_array(g_res, res, comm_size, g_ncell,ncell, 4); scatter_float_array(g_adt, adt, comm_size, g_ncell,ncell, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if(my_rank == MPI_ROOT) { free(g_cell); free(g_edge); free(g_ecell); free(g_bedge); free(g_becell); free(g_bound); free(g_x ); free(g_q); free(g_qold); free(g_adt); free(g_res); } /**------------------------END I/O -----------------------**/ /* FIXME: It's not clear to the compiler that sth. is going on behind the scenes here. Hence theses variables are reported as unused */ op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"float",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"float",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"float",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"float",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"float",res ,"p_res"); op_decl_const(1,"float",&gam ); op_decl_const(1,"float",&gm1 ); op_decl_const(1,"float",&cfl ); op_decl_const(1,"float",&eps ); op_decl_const(1,"float",&mach ); op_decl_const(1,"float",&alpha); op_decl_const(4,"float",qinf ); op_dump_to_hdf5(file_out); op_write_const_hdf5("gam", 1,"float",(char *)&gam, "new_grid_out.h5"); op_write_const_hdf5("gm1", 1,"float",(char *)&gm1, "new_grid_out.h5"); op_write_const_hdf5("cfl", 1,"float",(char *)&cfl, "new_grid_out.h5"); op_write_const_hdf5("eps", 1,"float",(char *)&eps, "new_grid_out.h5"); op_write_const_hdf5("mach", 1,"float",(char *)&mach, "new_grid_out.h5"); op_write_const_hdf5("alpha",1,"float",(char *)&alpha,"new_grid_out.h5"); op_write_const_hdf5("qinf", 4,"float",(char *)qinf, "new_grid_out.h5"); //create halos - for sanity check op_halo_create(); op_exit(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc, argv, 2); int niter; float rms; // timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // set constants and initialise flow field and residual op_printf("initialising flow field \n"); char file[] = "new_grid.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells, 2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges, nodes, 2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges, cells, 1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes, 4, file, "pcell"); op_dat p_bound = op_decl_dat_hdf5(bedges, 1, "int", file, "p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes, 2, "float", file, "p_x"); op_dat p_q = op_decl_dat_hdf5(cells, 4, "float", file, "p_q"); op_dat p_qold = op_decl_dat_hdf5(cells, 4, "float", file, "p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells, 1, "float", file, "p_adt"); op_dat p_res = op_decl_dat_hdf5(cells, 4, "float", file, "p_res"); op_get_const_hdf5("gam", 1, "float", (char *)&gam, "new_grid.h5"); op_get_const_hdf5("gm1", 1, "float", (char *)&gm1, "new_grid.h5"); op_get_const_hdf5("cfl", 1, "float", (char *)&cfl, "new_grid.h5"); op_get_const_hdf5("eps", 1, "float", (char *)&eps, "new_grid.h5"); op_get_const_hdf5("mach", 1, "float", (char *)&mach, "new_grid.h5"); op_get_const_hdf5("alpha", 1, "float", (char *)&alpha, "new_grid.h5"); op_get_const_hdf5("qinf", 4, "float", (char *)&qinf, "new_grid.h5"); op_decl_const2("gam", 1, "float", &gam); op_decl_const2("gm1", 1, "float", &gm1); op_decl_const2("cfl", 1, "float", &cfl); op_decl_const2("eps", 1, "float", &eps); op_decl_const2("mach", 1, "float", &mach); op_decl_const2("alpha", 1, "float", &alpha); op_decl_const2("qinf", 4, "float", qinf); if (op_is_root()) op_diagnostic_output(); // trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x); // op_partition("PARMETIS", "KWAY", edges, pecell, p_x); int g_ncell = op_get_size(cells); // initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for (int iter = 1; iter <= niter; iter++) { // save old flow solution op_par_loop_save_soln("save_soln", cells, op_arg_dat(p_q, -1, OP_ID, 4, "float", OP_READ), op_arg_dat(p_qold, -1, OP_ID, 4, "float", OP_WRITE)); // predictor/corrector update loop for (int k = 0; k < 2; k++) { // calculate area/timstep op_par_loop_adt_calc("adt_calc", cells, op_arg_dat(p_x, 0, pcell, 2, "float", OP_READ), op_arg_dat(p_x, 1, pcell, 2, "float", OP_READ), op_arg_dat(p_x, 2, pcell, 2, "float", OP_READ), op_arg_dat(p_x, 3, pcell, 2, "float", OP_READ), op_arg_dat(p_q, -1, OP_ID, 4, "float", OP_READ), op_arg_dat(p_adt, -1, OP_ID, 1, "float", OP_WRITE)); // calculate flux residual op_par_loop_res_calc("res_calc", edges, op_arg_dat(p_x, 0, pedge, 2, "float", OP_READ), op_arg_dat(p_x, 1, pedge, 2, "float", OP_READ), op_arg_dat(p_q, 0, pecell, 4, "float", OP_READ), op_arg_dat(p_q, 1, pecell, 4, "float", OP_READ), op_arg_dat(p_adt, 0, pecell, 1, "float", OP_READ), op_arg_dat(p_adt, 1, pecell, 1, "float", OP_READ), op_arg_dat(p_res, 0, pecell, 4, "float", OP_INC), op_arg_dat(p_res, 1, pecell, 4, "float", OP_INC)); op_par_loop_bres_calc("bres_calc", bedges, op_arg_dat(p_x, 0, pbedge, 2, "float", OP_READ), op_arg_dat(p_x, 1, pbedge, 2, "float", OP_READ), op_arg_dat(p_q, 0, pbecell, 4, "float", OP_READ), op_arg_dat(p_adt, 0, pbecell, 1, "float", OP_READ), op_arg_dat(p_res, 0, pbecell, 4, "float", OP_INC), op_arg_dat(p_bound, -1, OP_ID, 1, "int", OP_READ)); // update flow field rms = 0.0; op_par_loop_update("update", cells, op_arg_dat(p_qold, -1, OP_ID, 4, "float", OP_READ), op_arg_dat(p_q, -1, OP_ID, 4, "float", OP_WRITE), op_arg_dat(p_res, -1, OP_ID, 4, "float", OP_RW), op_arg_dat(p_adt, -1, OP_ID, 1, "float", OP_READ), op_arg_gbl(&rms, 1, "float", OP_INC)); } // print iteration history rms = sqrtf(rms / (float)g_ncell); if (iter % 100 == 0) op_printf(" %d %10.5e \n", iter, rms); if (iter % 1000 == 0 && g_ncell == 720000) { // defailt mesh -- for validation testing op_printf(" %d %3.16f \n", iter, rms); float diff = fabsf((100.0 * (rms / 0.000105987)) - 100.0); op_printf("\n\nTest problem with %d cells is within %3.15E %% of the " "expected solution\n", 720000, diff); if (diff < 0.1) { op_printf("This test is considered PASSED\n"); } else { op_printf("This test is considered FAILED\n"); } } } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = %f\n", wall_t2 - wall_t1); op_exit(); }
void op_init_soa ( int argc, char ** argv, int diags, int soa ) { OP_auto_soa = soa; op_init(argc,argv,diags); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // read in grid op_printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("./new_grid.dat","r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (double *) malloc(2*nnode*sizeof(double)); q = (double *) malloc(4*ncell*sizeof(double)); qold = (double *) malloc(4*ncell*sizeof(double)); res = (double *) malloc(4*ncell*sizeof(double)); adt = (double *) malloc( ncell*sizeof(double)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%lf %lf \n",&x[2*n], &x[2*n+1]) != 2) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); // set constants and initialise flow field and residual op_printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = qinf[m]; res[4*n+m] = 0.0f; } } // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"double",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"double",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"double",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"double",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"double",res ,"p_res"); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&cfl ); op_decl_const(1,"double",&eps ); op_decl_const(1,"double",&mach ); op_decl_const(1,"double",&alpha); op_decl_const(4,"double",qinf ); op_diagnostic_output(); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE)); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pecell,4,"double",OP_READ), op_arg_dat(p_q, 1,pecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"double",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pecell,4,"double",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"double",OP_INC )); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"double",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double) op_get_size(cells)); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); //output the result dat array to files op_print_dat_to_txtfile(p_q, "out_grid_seq.dat"); //ASCI op_print_dat_to_binfile(p_q, "out_grid_seq.bin"); //Binary op_timing_output(); op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // set constants and initialise flow field and residual op_printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; char file[] = "new_grid.h5";//"new_grid-26mil.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell"); op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x"); op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q"); op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt"); op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res"); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&cfl ); op_decl_const(1,"double",&eps ); op_decl_const(1,"double",&mach ); op_decl_const(1,"double",&alpha); op_decl_const(4,"double",qinf ); op_diagnostic_output(); int g_ncell = op_get_size(cells); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE)); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pecell,4,"double",OP_READ), op_arg_dat(p_q, 1,pecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"double",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pecell,4,"double",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"double",OP_INC )); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"double",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double)g_ncell); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1); op_exit(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // set constants and initialise flow field and residual op_printf("initialising flow field \n"); char file[] = "new_grid.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell"); op_map m_test = op_decl_map_hdf5(cells, nodes,4, file, "m_test"); if (m_test == NULL) printf("m_test not found\n"); op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x"); op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q"); op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt"); op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res"); op_dat p_test = op_decl_dat_hdf5(cells ,4,"double",file,"p_test"); if (p_test == NULL) printf("p_test not found\n"); op_get_const_hdf5("gam", 1, "double", (char *)&gam, "new_grid.h5"); op_get_const_hdf5("gm1", 1, "double", (char *)&gm1, "new_grid.h5"); op_get_const_hdf5("cfl", 1, "double", (char *)&cfl, "new_grid.h5"); op_get_const_hdf5("eps", 1, "double", (char *)&eps, "new_grid.h5"); op_get_const_hdf5("mach", 1, "double", (char *)&mach, "new_grid.h5"); op_get_const_hdf5("alpha", 1, "double", (char *)&alpha, "new_grid.h5"); op_get_const_hdf5("qinf", 4, "double", (char *)&qinf, "new_grid.h5"); op_decl_const2("gam",1,"double",&gam); op_decl_const2("gm1",1,"double",&gm1); op_decl_const2("cfl",1,"double",&cfl); op_decl_const2("eps",1,"double",&eps); op_decl_const2("mach",1,"double",&mach); op_decl_const2("alpha",1,"double",&alpha); op_decl_const2("qinf",4,"double",qinf); op_diagnostic_output(); //write back original data just to compare you read the file correctly //do an h5diff between new_grid_out.h5 and new_grid.h5 to //compare two hdf5 files op_dump_to_hdf5("new_grid_out.h5"); op_write_const_hdf5("gam",1,"double",(char *)&gam, "new_grid_out.h5"); op_write_const_hdf5("gm1",1,"double",(char *)&gm1, "new_grid_out.h5"); op_write_const_hdf5("cfl",1,"double",(char *)&cfl, "new_grid_out.h5"); op_write_const_hdf5("eps",1,"double",(char *)&eps, "new_grid_out.h5"); op_write_const_hdf5("mach",1,"double",(char *)&mach, "new_grid_out.h5"); op_write_const_hdf5("alpha",1,"double",(char *)&alpha, "new_grid_out.h5"); op_write_const_hdf5("qinf",4,"double",(char *)qinf, "new_grid_out.h5"); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x); //op_partition("PARMETIS", "KWAY", edges, pecell, p_x); int g_ncell = op_get_size(cells); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop_save_soln("save_soln",cells, op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop_adt_calc("adt_calc",cells, op_arg_dat(p_x,0,pcell,2,"double",OP_READ), op_arg_dat(p_x,1,pcell,2,"double",OP_READ), op_arg_dat(p_x,2,pcell,2,"double",OP_READ), op_arg_dat(p_x,3,pcell,2,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_WRITE)); // calculate flux residual op_par_loop_res_calc("res_calc",edges, op_arg_dat(p_x,0,pedge,2,"double",OP_READ), op_arg_dat(p_x,1,pedge,2,"double",OP_READ), op_arg_dat(p_q,0,pecell,4,"double",OP_READ), op_arg_dat(p_q,1,pecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pecell,1,"double",OP_READ), op_arg_dat(p_adt,1,pecell,1,"double",OP_READ), op_arg_dat(p_res,0,pecell,4,"double",OP_INC), op_arg_dat(p_res,1,pecell,4,"double",OP_INC)); op_par_loop_bres_calc("bres_calc",bedges, op_arg_dat(p_x,0,pbedge,2,"double",OP_READ), op_arg_dat(p_x,1,pbedge,2,"double",OP_READ), op_arg_dat(p_q,0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pbecell,1,"double",OP_READ), op_arg_dat(p_res,0,pbecell,4,"double",OP_INC), op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ)); // update flow field rms = 0.0; op_par_loop_update("update",cells, op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_WRITE), op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double)g_ncell); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); //write given op_dat's indicated segment of data to a memory block in the order it was originally //arranged (i.e. before partitioning and reordering) double* q = (double *)op_malloc(sizeof(double)*op_get_size(cells)*4); op_fetch_data_idx(p_q, q, 0, op_get_size(cells)-1); free(q); //write given op_dat's data to hdf5 file in the order it was originally arranged (i.e. before partitioning and reordering) op_fetch_data_hdf5_file(p_q, "file_name.h5"); //printf("Root process = %d\n",op_is_root()); //output the result dat array to files //op_dump_to_hdf5("new_grid_out.h5"); //writes data as it is held on each process (under MPI) //compress using // ~/hdf5/bin/h5repack -f GZIP=9 new_grid.h5 new_grid_pack.h5 op_timing_output(); op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // set constants and initialise flow field and residual op_printf("initialising flow field \n"); char file[] = "new_grid.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell"); op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x"); op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q"); op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt"); op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res"); op_get_const_hdf5("gam", 1, "double", (char *)&gam, "new_grid.h5"); op_get_const_hdf5("gm1", 1, "double", (char *)&gm1, "new_grid.h5"); op_get_const_hdf5("cfl", 1, "double", (char *)&cfl, "new_grid.h5"); op_get_const_hdf5("eps", 1, "double", (char *)&eps, "new_grid.h5"); op_get_const_hdf5("mach", 1, "double", (char *)&mach, "new_grid.h5"); op_get_const_hdf5("alpha", 1, "double", (char *)&alpha, "new_grid.h5"); op_get_const_hdf5("qinf", 4, "double", (char *)&qinf, "new_grid.h5"); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&cfl ); op_decl_const(1,"double",&eps ); op_decl_const(1,"double",&mach ); op_decl_const(1,"double",&alpha); op_decl_const(4,"double",qinf ); op_diagnostic_output(); //write back original data just to compare you read the file correctly //do an h5diff between new_grid_out.h5 and new_grid.h5 to //compare two hdf5 files op_write_hdf5("new_grid_out.h5"); op_write_const_hdf5("gam",1,"double",(char *)&gam, "new_grid_out.h5"); op_write_const_hdf5("gm1",1,"double",(char *)&gm1, "new_grid_out.h5"); op_write_const_hdf5("cfl",1,"double",(char *)&cfl, "new_grid_out.h5"); op_write_const_hdf5("eps",1,"double",(char *)&eps, "new_grid_out.h5"); op_write_const_hdf5("mach",1,"double",(char *)&mach, "new_grid_out.h5"); op_write_const_hdf5("alpha",1,"double",(char *)&alpha, "new_grid_out.h5"); op_write_const_hdf5("qinf",4,"double",(char *)qinf, "new_grid_out.h5"); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x); int g_ncell = op_get_size(cells); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, -4,pcell, 2,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE)); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, -2,pedge, 2,"double",OP_READ), op_arg_dat(p_q, -2,pecell,4,"double",OP_READ), op_arg_dat(p_adt, -2,pecell,1,"double",OP_READ), op_arg_dat(p_res, -2,pecell,4,"double",OP_INC )); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, -2,pbedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"double",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double)g_ncell); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1); op_exit(); }
int main(int argc,char *argv[]) { int *becell; int *ecell; int *bound; int *bedge; int *edge; int *cell; float *x; float *q; float *qold; float *adt; float *res; int nnode; int ncell; int nedge; int nbedge; int niter; float rms; if (argc != 2) { printf("Usage: airfoil <grid>\n"); exit(1); } // read in grid printf("reading in grid \n"); char *grid = argv[1]; FILE *fp; if ((fp = fopen(grid,"r")) == 0L) { printf("can\'t open file %s\n",grid); exit((-1)); } if (fscanf(fp,"%d %d %d %d \n",&nnode,&ncell,&nedge,&nbedge) != 4) { printf("error reading from %s\n",grid); exit((-1)); } cell = ((int *)(malloc(((4 * ncell) * (sizeof(int )))))); edge = ((int *)(malloc(((2 * nedge) * (sizeof(int )))))); ecell = ((int *)(malloc(((2 * nedge) * (sizeof(int )))))); bedge = ((int *)(malloc(((2 * nbedge) * (sizeof(int )))))); becell = ((int *)(malloc((nbedge * (sizeof(int )))))); bound = ((int *)(malloc((nbedge * (sizeof(int )))))); x = ((float *)(malloc(((2 * nnode) * (sizeof(float )))))); q = ((float *)(malloc(((4 * ncell) * (sizeof(float )))))); qold = ((float *)(malloc(((4 * ncell) * (sizeof(float )))))); res = ((float *)(malloc(((4 * ncell) * (sizeof(float )))))); adt = ((float *)(malloc((ncell * (sizeof(float )))))); for (int n = 0; n < nnode; n++) { if (fscanf(fp,"%f %f \n",(x + (2 * n)),(x + ((2 * n) + 1))) != 2) { printf("error reading from new_grid.dat\n"); exit((-1)); } } for (int n = 0; n < ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",(cell + (4 * n)),(cell + ((4 * n) + 1)),(cell + ((4 * n) + 2)),(cell + ((4 * n) + 3))) != 4) { printf("error reading from new_grid.dat\n"); exit((-1)); } } for (int n = 0; n < nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",(edge + (2 * n)),(edge + ((2 * n) + 1)),(ecell + (2 * n)),(ecell + ((2 * n) + 1))) != 4) { printf("error reading from new_grid.dat\n"); exit((-1)); } } for (int n = 0; n < nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",(bedge + (2 * n)),(bedge + ((2 * n) + 1)),(becell + n),(bound + n)) != 4) { printf("error reading from new_grid.dat\n"); exit((-1)); } } fclose(fp); // set constants and initialise flow field and residual printf("initialising flow field \n"); gam = 1.4f; gm1 = (gam - 1.0f); cfl = 0.9f; eps = 0.05f; float mach = 0.4f; float alpha = ((3.0f * atan(1.0f)) / 45.0f); float p = 1.0f; float r = 1.0f; float u = (sqrt(((gam * p) / r)) * mach); float e = ((p / (r * gm1)) + ((0.5f * u) * u)); qinf[0] = r; qinf[1] = (r * u); qinf[2] = 0.0f; qinf[3] = (r * e); for (int n = 0; n < ncell; n++) { for (int m = 0; m < 4; m++) { q[(4 * n) + m] = qinf[m]; res[(4 * n) + m] = 0.0f; } } // OP initialisation op_init(argc,argv,2); // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode,"nodes"); op_set edges = op_decl_set(nedge,"edges"); op_set bedges = op_decl_set(nbedge,"bedges"); op_set cells = op_decl_set(ncell,"cells"); op_map pedge = op_decl_map(edges,nodes,2,edge,"pedge"); op_map pecell = op_decl_map(edges,cells,2,ecell,"pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge,"pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells,nodes,4,cell,"pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int",bound,"p_bound"); op_dat p_x = op_decl_dat(nodes,2,"float",x,"p_x"); op_dat p_q = op_decl_dat(cells,4,"float",q,"p_q"); op_dat p_qold = op_decl_dat(cells,4,"float",qold,"p_qold"); op_dat p_adt = op_decl_dat(cells,1,"float",adt,"p_adt"); op_dat p_res = op_decl_dat(cells,4,"float",res,"p_res"); op_decl_const(1,"float",&gam); op_decl_const(1,"float",&gm1); op_decl_const(1,"float",&cfl); op_decl_const(1,"float",&eps); op_decl_const(1,"float",&mach); op_decl_const(1,"float",&alpha); op_decl_const(4,"float",qinf); op_diagnostic_output(); // main time-marching loop niter = 1000; for (int iter = 1; iter <= niter; iter++) { // save old flow solution save_soln_host("save_soln_modified",cells,op_arg_dat(p_q,(-1), OP_ID,4,"float",OP_READ),op_arg_dat(p_qold,(-1), OP_ID,4,"float",OP_WRITE)); // predictor/corrector update loop for (int k = 0; k < 2; k++) { // calculate area/timstep adt_calc_host("adt_calc_modified",cells,op_arg_dat(p_x,0,pcell,2,"float",OP_READ),op_arg_dat(p_x,1,pcell,2,"float",OP_READ),op_arg_dat(p_x,2,pcell,2,"float",OP_READ),op_arg_dat(p_x,3,pcell,2,"float",OP_READ),op_arg_dat(p_q,(-1), OP_ID,4,"float",OP_READ),op_arg_dat(p_adt,(-1), OP_ID,1,"float",OP_WRITE)); // calculate flux residual res_calc_host("res_calc_modified",edges,op_arg_dat(p_x,0,pedge,2,"float",OP_READ),op_arg_dat(p_x,1,pedge,2,"float",OP_READ),op_arg_dat(p_q,0,pecell,4,"float",OP_READ),op_arg_dat(p_q,1,pecell,4,"float",OP_READ),op_arg_dat(p_adt,0,pecell,1,"float",OP_READ),op_arg_dat(p_adt,1,pecell,1,"float",OP_READ),op_arg_dat(p_res,0,pecell,4,"float",OP_INC),op_arg_dat(p_res,1,pecell,4,"float",OP_INC)); bres_calc_host("bres_calc_modified",bedges,op_arg_dat(p_x,0,pbedge,2,"float",OP_READ),op_arg_dat(p_x,1,pbedge,2,"float",OP_READ),op_arg_dat(p_q,0,pbecell,4,"float",OP_READ),op_arg_dat(p_adt,0,pbecell,1,"float",OP_READ),op_arg_dat(p_res,0,pbecell,4,"float",OP_INC),op_arg_dat(p_bound,(-1), OP_ID,1,"int",OP_READ)); // update flow field rms = 0.0; update_host("update_modified",cells,op_arg_dat(p_qold,(-1), OP_ID,4,"float",OP_READ),op_arg_dat(p_q,(-1), OP_ID,4,"float",OP_WRITE),op_arg_dat(p_res,(-1), OP_ID,4,"float",OP_RW),op_arg_dat(p_adt,(-1), OP_ID,1,"float",OP_READ),op_arg_gbl(&rms,1,"float",OP_INC)); } // print iteration history rms = (sqrt((rms / ((float )ncell)))); if ((iter % 100) == 0) printf(" %d %10.5e \n",iter,rms); } /* for (int ll = 0; ll < (4 * ncell); ll++) printf("%lf\n",q[ll]);*/ op_timing_output(); return 0; }
int main(int argc, char *argv[]){ int *becell, *ecell, *bound, *bedge, *edge, *cell; REAL *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; REAL rms; if (argc != 2) { printf("Usage: airfoil <grid>\n"); exit(1); } // read in grid printf("reading in grid \n"); char* grid = argv[1]; FILE *fp; if ( (fp = fopen(grid,"r")) == NULL) { printf("can't open file %s\n", grid); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { printf("error reading from %s\n", grid); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (REAL *) malloc(2*nnode*sizeof(REAL)); q = (REAL *) malloc(4*ncell*sizeof(REAL)); qold = (REAL *) malloc(4*ncell*sizeof(REAL)); res = (REAL *) malloc(4*ncell*sizeof(REAL)); adt = (REAL *) malloc( ncell*sizeof(REAL)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%f %f \n",&x[2*n], &x[2*n+1]) != 2) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); // set constants and initialise flow field and residual printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; REAL mach = 0.4f; REAL alpha = 3.0f*atan(1.0f)/45.0f; REAL p = 1.0f; REAL r = 1.0f; REAL u = sqrt(gam*p/r)*mach; REAL e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = qinf[m]; res[4*n+m] = 0.0f; } } // OP initialisation op_init(argc,argv,2); op_tuner* global_tuner = op_create_global_tuner(); global_tuner->op_warpsize = 1; global_tuner->block_size = 64; global_tuner->part_size = 128; global_tuner->cache_line_size = 128; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,REAL_STRING,x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,REAL_STRING,q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,REAL_STRING,qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,REAL_STRING,adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,REAL_STRING,res ,"p_res"); op_decl_const(1,REAL_STRING,&gam ); op_decl_const(1,REAL_STRING,&gm1 ); op_decl_const(1,REAL_STRING,&cfl ); op_decl_const(1,REAL_STRING,&eps ); op_decl_const(1,REAL_STRING,&mach ); op_decl_const(1,REAL_STRING,&alpha); op_decl_const(4,REAL_STRING,qinf ); op_tuner* save_soln_tuner = op_create_tuner("save_soln"); save_soln_tuner->part_size = 64; save_soln_tuner->block_size = 4; op_tuner* adt_calc_tuner = op_create_tuner("adt_calc"); adt_calc_tuner->part_size = 64; adt_calc_tuner->block_size = 4; op_tuner* res_calc_tuner = op_create_tuner("res_calc"); res_calc_tuner->part_size = 64; res_calc_tuner->block_size = 4; op_tuner* bres_calc_tuner = op_create_tuner("bres_calc"); bres_calc_tuner->part_size = 64; bres_calc_tuner->block_size = 4; op_tuner* update_tuner = op_create_tuner("update"); update_tuner->part_size = 64; update_tuner->block_size = 4; op_diagnostic_output(); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,REAL_STRING,OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,REAL_STRING,OP_WRITE), save_soln_tuner); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,REAL_STRING,OP_READ ), op_arg_dat(p_x, 1,pcell, 2,REAL_STRING,OP_READ ), op_arg_dat(p_x, 2,pcell, 2,REAL_STRING,OP_READ ), op_arg_dat(p_x, 3,pcell, 2,REAL_STRING,OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,REAL_STRING,OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,REAL_STRING,OP_WRITE), adt_calc_tuner); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,REAL_STRING,OP_READ), op_arg_dat(p_x, 1,pedge, 2,REAL_STRING,OP_READ), op_arg_dat(p_q, 0,pecell,4,REAL_STRING,OP_READ), op_arg_dat(p_q, 1,pecell,4,REAL_STRING,OP_READ), op_arg_dat(p_adt, 0,pecell,1,REAL_STRING,OP_READ), op_arg_dat(p_adt, 1,pecell,1,REAL_STRING,OP_READ), op_arg_dat(p_res, 0,pecell,4,REAL_STRING,OP_INC ), op_arg_dat(p_res, 1,pecell,4,REAL_STRING,OP_INC ), res_calc_tuner); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,REAL_STRING,OP_READ), op_arg_dat(p_x, 1,pbedge, 2,REAL_STRING,OP_READ), op_arg_dat(p_q, 0,pbecell,4,REAL_STRING,OP_READ), op_arg_dat(p_adt, 0,pbecell,1,REAL_STRING,OP_READ), op_arg_dat(p_res, 0,pbecell,4,REAL_STRING,OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ), bres_calc_tuner); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,REAL_STRING,OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,REAL_STRING,OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,REAL_STRING,OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,REAL_STRING,OP_READ ), op_arg_gbl(&rms,1,REAL_STRING,OP_INC), update_tuner); } // print iteration history rms = sqrt(rms/(REAL) ncell); if ( iter % 100 == 0 ) printf(" %d %10.5e \n",iter,rms); } for ( int ll = 0; ll < 4*ncell; ll++ ) { printf ( "%lf\n", q[ll] ); } op_timing_output(); }
int main(int argc, char **argv){ int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int niter; double rms; // set constants and initialise flow field and residual printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; // OP initialisation op_init(argc,argv,2); char file[] = "new_grid.h5";//"new_grid-26mil.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell"); op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x"); op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q"); op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt"); op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res"); op_decl_const2("gam",1,"double",&gam ); op_decl_const2("gm1",1,"double",&gm1 ); op_decl_const2("cfl",1,"double",&cfl ); op_decl_const2("eps",1,"double",&eps ); op_decl_const2("mach",1,"double",&mach ); op_decl_const2("alpha",1,"double",&alpha); op_decl_const2("qinf",4,"double",qinf ); op_diagnostic_output(); niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop_save_soln("save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop_adt_calc("adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE)); // calculate flux residual op_par_loop_res_calc("res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pecell,4,"double",OP_READ), op_arg_dat(p_q, 1,pecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"double",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pecell,4,"double",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"double",OP_INC )); op_par_loop_bres_calc("bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"double",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); // update flow field rms = 0.0; op_par_loop_update("update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double) cells->size); if (iter%100 == 0) printf(" %d %10.5e \n",iter,rms); } op_timing_output(); }
int main(int argc, char **argv){ int *becell, *ecell, *bound, *bedge, *edge, *cell; float *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; float rms; op_set nodes, edges, bedges, cells; op_map pedge, pecell, pbedge, pbecell, pcell; op_dat p_x, p_q, p_qold, p_res, p_adt, p_bound, p_rms; /* read in grid */ printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("new_grid.dat","r")) == NULL) { printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (float *) malloc(2*nnode*sizeof(float)); q = (float *) malloc(4*ncell*sizeof(float)); qold = (float *) malloc(4*ncell*sizeof(float)); res = (float *) malloc(4*ncell*sizeof(float)); adt = (float *) malloc( ncell*sizeof(float)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%f %f \n",&x[2*n], &x[2*n+1]) != 2) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); /* set constants and initialise flow field and residual */ printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; float mach = 0.4f; float alpha = 3.0f*atan(1.0f)/45.0f; float p = 1.0f; float r = 1.0f; float u = sqrt(gam*p/r)*mach; float e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = qinf[m]; res[4*n+m] = 0.0f; } } /* OP initialisation */ op_init(argc,argv,2); /* declare sets, pointers, datasets and global constants */ op_decl_set(&nodes, nnode, "nodes"); op_decl_set(&edges, nedge, "edges"); op_decl_set(&bedges,nbedge, "bedges"); op_decl_set(&cells, ncell, "cells"); op_decl_map(&pedge, &edges, &nodes,2,edge, "pedge"); op_decl_map(&pecell, &edges, &cells,2,ecell, "pecell"); op_decl_map(&pbedge, &bedges,&nodes,2,bedge, "pbedge"); op_decl_map(&pbecell,&bedges,&cells,1,becell,"pbecell"); op_decl_map(&pcell, &cells, &nodes,4,cell, "pcell"); op_decl_vec(&p_bound,&bedges,1,sizeof(int),bound,"p_bound"); op_decl_vec(&p_x ,&nodes ,2,sizeof(float),x ,"p_x"); op_decl_vec(&p_q ,&cells ,4,sizeof(float),q ,"p_q"); op_decl_vec(&p_qold ,&cells ,4,sizeof(float),qold ,"p_qold"); op_decl_vec(&p_adt ,&cells ,1,sizeof(float),adt ,"p_adt"); op_decl_vec(&p_res ,&cells ,4,sizeof(float),res ,"p_res"); op_decl_gbl(&p_rms ,1,sizeof(float),&rms ,"p_rms"); op_decl_const(&gam, 1, sizeof(float)); op_decl_const(&gm1, 1, sizeof(float)); op_decl_const(&cfl, 1, sizeof(float)); op_decl_const(&eps, 1, sizeof(float)); op_decl_const(&mach, 1, sizeof(float)); op_decl_const(&alpha,1, sizeof(float)); op_decl_const(qinf, 4, sizeof(float)); op_diagnostic_output(); /* main time-marching loop */ niter = 1000; for(int iter=1; iter<=niter; iter++) { /* save old flow solution */ op_par_loop_2((void(*)(void*,void*))save_soln,"save_soln", &cells, op_construct_vec_arg(&p_q, OP_NONE, NULL,OP_READ ), op_construct_vec_arg(&p_qold,OP_NONE, NULL,OP_WRITE)); /* predictor/corrector update loop */ for(int k=0; k<2; k++) { /* calculate area/timstep */ op_par_loop_3((void(*)(void*,void*,void*))adt_calc,"adt_calc",&cells, op_construct_vec_arg(&p_x, OP_ALL, &pcell, OP_READ ), op_construct_vec_arg(&p_q, OP_NONE, NULL, OP_READ ), op_construct_vec_arg(&p_adt, OP_NONE, NULL, OP_WRITE)); /* calculate flux residual */ op_par_loop_4((void(*)(void*,void*,void*,void*))res_calc,"res_calc",&edges, op_construct_vec_arg(&p_x, OP_ALL,&pedge, OP_READ), op_construct_vec_arg(&p_q, OP_ALL,&pecell,OP_READ), op_construct_vec_arg(&p_adt, OP_ALL,&pecell,OP_READ), op_construct_vec_arg(&p_res, OP_ALL,&pecell,OP_INC)); op_par_loop_5((void(*)(void*,void*,void*,void*,void*))bres_calc,"bres_calc",&bedges, op_construct_vec_arg(&p_x, OP_ALL,&pbedge, OP_READ), op_construct_vec_arg(&p_q, 0,&pbecell,OP_READ), op_construct_vec_arg(&p_adt, 0,&pbecell,OP_READ), op_construct_vec_arg(&p_res, 0,&pbecell,OP_INC), op_construct_vec_arg(&p_bound,OP_NONE, NULL,OP_READ)); /* update flow field */ rms = 0.0; op_par_loop_5((void(*)(void*,void*,void*,void*,void*))update,"update",&cells, op_construct_vec_arg(&p_qold,OP_NONE, NULL, OP_READ), op_construct_vec_arg(&p_q, OP_NONE, NULL, OP_WRITE), op_construct_vec_arg(&p_res, OP_NONE, NULL, OP_RW), op_construct_vec_arg(&p_adt, OP_NONE, NULL, OP_READ), op_construct_gbl_arg(&p_rms, OP_INC)); } /* print iteration history */ rms = sqrt(rms/(float) ncell); if (iter%100 == 0) printf(" %d %10.5e \n",iter,rms); } op_timing_output(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int *bnode, *cell; double *xm;//, *q; int nnode,ncell,nbnodes,niter; double rms = 1; // set constants and initialise flow field and residual op_printf("initialising flow field \n"); double gam = 1.4; gm1 = gam - 1.0; gm1i = 1.0/gm1; wtg1[0] = 0.5; wtg1[1] = 0.5; xi1[0] = 0.211324865405187; xi1[1] = 0.788675134594813; Ng1[0] = 0.788675134594813; Ng1[1] = 0.211324865405187; Ng1[2] = 0.211324865405187; Ng1[3] = 0.788675134594813; Ng1_xi[0] = -1; Ng1_xi[1] = -1; Ng1_xi[2] = 1; Ng1_xi[3] = 1; wtg2[0] = 0.25; wtg2[1] = 0.25; wtg2[2] = 0.25; wtg2[3] = 0.25; Ng2[0] = 0.622008467928146; Ng2[1] = 0.166666666666667; Ng2[2] = 0.166666666666667; Ng2[3] = 0.044658198738520; Ng2[4] = 0.166666666666667; Ng2[5] = 0.622008467928146; Ng2[6] = 0.044658198738520; Ng2[7] = 0.166666666666667; Ng2[8] = 0.166666666666667; Ng2[9] = 0.044658198738520; Ng2[10] = 0.622008467928146; Ng2[11] = 0.166666666666667; Ng2[12] = 0.044658198738520; Ng2[13] = 0.166666666666667; Ng2[14] = 0.166666666666667; Ng2[15] = 0.622008467928146; Ng2_xi[0] = -0.788675134594813; Ng2_xi[1] = 0.788675134594813; Ng2_xi[2] = -0.211324865405187;Ng2_xi[3] = 0.211324865405187; Ng2_xi[4] = -0.788675134594813; Ng2_xi[5] = 0.788675134594813; Ng2_xi[6] = -0.211324865405187; Ng2_xi[7] = 0.211324865405187; Ng2_xi[8] = -0.211324865405187; Ng2_xi[9] = 0.211324865405187; Ng2_xi[10] = -0.788675134594813; Ng2_xi[11] = 0.788675134594813; Ng2_xi[12] = -0.211324865405187; Ng2_xi[13] = 0.211324865405187; Ng2_xi[14] = -0.788675134594813; Ng2_xi[15] = 0.788675134594813; Ng2_xi[16] = -0.788675134594813; Ng2_xi[17] = -0.211324865405187; Ng2_xi[18] = 0.788675134594813; Ng2_xi[19] = 0.211324865405187; Ng2_xi[20] = -0.211324865405187; Ng2_xi[21] = -0.788675134594813; Ng2_xi[22] = 0.211324865405187; Ng2_xi[23] = 0.788675134594813; Ng2_xi[24] = -0.788675134594813; Ng2_xi[25] = -0.211324865405187; Ng2_xi[26] = 0.788675134594813; Ng2_xi[27] = 0.211324865405187; Ng2_xi[28] = -0.211324865405187; Ng2_xi[29] = -0.788675134594813; Ng2_xi[30] = 0.211324865405187; Ng2_xi[31] = 0.788675134594813; minf = 0.1; m2 = minf*minf; freq = 1; kappa = 1; nmode = 0; mfan = 1.0; char file[] = "FE_grid.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set bnodes = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pbnodes = op_decl_map_hdf5(bnodes,nodes,1,file, "pbedge"); op_map pcell = op_decl_map_hdf5(cells, nodes,4,file, "pcell"); op_dat p_xm = op_decl_dat_hdf5(nodes ,2,"double", file, "p_x"); op_dat p_phim = op_decl_dat_hdf5(nodes, 1, "double", file, "p_phim"); op_dat p_resm = op_decl_dat_hdf5(nodes, 1, "double", file, "p_resm"); op_dat p_K = op_decl_dat_hdf5(cells, 16, "double:soa",file, "p_K"); op_dat p_V = op_decl_dat_hdf5(nodes, 1, "double", file, "p_V"); op_dat p_P = op_decl_dat_hdf5(nodes, 1, "double", file, "p_P"); op_dat p_U = op_decl_dat_hdf5(nodes, 1, "double", file, "p_U"); op_decl_const2("gam",1,"double",&gam ); op_decl_const2("gm1",1,"double",&gm1 ); op_decl_const2("gm1i",1,"double",&gm1i ); op_decl_const2("m2",1,"double",&m2 ); op_decl_const2("wtg1",2,"double",wtg1 ); op_decl_const2("xi1",2,"double",xi1 ); op_decl_const2("Ng1",4,"double",Ng1 ); op_decl_const2("Ng1_xi",4,"double",Ng1_xi ); op_decl_const2("wtg2",4,"double",wtg2 ); op_decl_const2("Ng2",16,"double",Ng2 ); op_decl_const2("Ng2_xi",32,"double",Ng2_xi ); op_decl_const2("minf",1,"double",&minf ); op_decl_const2("freq",1,"double",&freq ); op_decl_const2("kappa",1,"double",&kappa ); op_decl_const2("nmode",1,"double",&nmode ); op_decl_const2("mfan",1,"double",&mfan ); op_diagnostic_output(); op_partition("PTSCOTCH", "KWAY", cells, pcell, p_xm); op_printf("nodes: %d cells: %d bnodes: %d\n", nodes->size, cells->size, bnodes->size); nnode = op_get_size(nodes); ncell = op_get_size(cells); nbnodes = op_get_size(bnodes); double cpu_t1, cpu_t2, wall_t1, wall_t2; op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 20; for(int iter=1; iter<=niter; iter++) { op_par_loop_res_calc("res_calc",cells, op_arg_dat(p_xm,-4,pcell,2,"double",OP_READ), op_arg_dat(p_phim,-4,pcell,1,"double",OP_READ), op_arg_dat(p_K,-1,OP_ID,16,"double:soa",OP_WRITE), op_arg_dat(p_resm,-4,pcell,1,"double",OP_INC)); op_par_loop_dirichlet("dirichlet",bnodes, op_arg_dat(p_resm,0,pbnodes,1,"double",OP_WRITE)); double c1 = 0; double c2 = 0; double c3 = 0; double alpha = 0; double beta = 0; //c1 = R'*R; op_par_loop_init_cg("init_cg",nodes, op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&c1,1,"double",OP_INC), op_arg_dat(p_U,-1,OP_ID,1,"double",OP_WRITE), op_arg_dat(p_V,-1,OP_ID,1,"double",OP_WRITE), op_arg_dat(p_P,-1,OP_ID,1,"double",OP_WRITE)); //set up stopping conditions double res0 = sqrt(c1); double res = res0; int iter = 0; int maxiter = 200; while (res > 0.1*res0 && iter < maxiter) { //V = Stiffness*P op_par_loop_spMV("spMV",cells, op_arg_dat(p_V,-4,pcell,1,"double",OP_INC), op_arg_dat(p_K,-1,OP_ID,16,"double:soa",OP_READ), op_arg_dat(p_P,-4,pcell,1,"double",OP_READ)); op_par_loop_dirichlet("dirichlet",bnodes, op_arg_dat(p_V,0,pbnodes,1,"double",OP_WRITE)); c2 = 0; //c2 = P'*V; op_par_loop_dotPV("dotPV",nodes, op_arg_dat(p_P,-1,OP_ID,1,"double",OP_READ), op_arg_dat(p_V,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&c2,1,"double",OP_INC)); alpha = c1/c2; //U = U + alpha*P; //resm = resm-alpha*V; op_par_loop_updateUR("updateUR",nodes, op_arg_dat(p_U,-1,OP_ID,1,"double",OP_INC), op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_INC), op_arg_dat(p_P,-1,OP_ID,1,"double",OP_READ), op_arg_dat(p_V,-1,OP_ID,1,"double",OP_RW), op_arg_gbl(&alpha,1,"double",OP_READ)); c3 = 0; //c3 = resm'*resm; op_par_loop_dotR("dotR",nodes, op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&c3,1,"double",OP_INC)); beta = c3/c1; //P = beta*P+resm; op_par_loop_updateP("updateP",nodes, op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_READ), op_arg_dat(p_P,-1,OP_ID,1,"double",OP_RW), op_arg_gbl(&beta,1,"double",OP_READ)); c1 = c3; res = sqrt(c1); iter++; } rms = 0; //phim = phim - Stiffness\Load; op_par_loop_update("update",nodes, op_arg_dat(p_phim,-1,OP_ID,1,"double",OP_RW), op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_WRITE), op_arg_dat(p_U,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&rms,1,"double",OP_INC)); op_printf("rms = %10.5e iter: %d\n", sqrt(rms)/sqrt(nnode), iter); } op_timing_output(); op_timers(&cpu_t2, &wall_t2); op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); }
int main(int argc, char **argv) { int nnode = (NN+1); int *p_elem_node = (int *)malloc(2*sizeof(int)*NN); Real *p_xn = (Real *)malloc(sizeof(Real)*nnode); Real *p_x = (Real *)malloc(sizeof(Real)*nnode); Real *p_xref = (Real *)malloc(sizeof(Real)*nnode); Real *p_y = (Real *)malloc(sizeof(Real)*nnode); // create element -> node mapping for (int i = 0; i < NN; ++i) { p_elem_node[2*i] = i; p_elem_node[2*i+1] = i+1; } // create coordinates and populate x with -1/pi^2*sin(pi*x) for (int i = 0; i < nnode; ++i) { /*p_xn[i] = sin(0.5*M_PI*i/NN);*/ p_xn[i] = (Real)i/NN; p_x[i] = (1./(M_PI*M_PI))*sin(M_PI*p_xn[i]); p_xref[i] = sin(M_PI*p_xn[i]); } // OP initialisation op_init(argc,argv,2); // declare sets, pointers, and datasets op_set nodes, elements; op_map elem_node; op_dat x, y, xn, mat; op_sparsity mat_sparsity; op_decl_set(&nodes, nnode, "nodes"); op_decl_set(&elements, NN, "elements"); op_decl_map(&elem_node, &elements, &nodes, 2, p_elem_node, "elem_node"); /*dump_map(&elem_node, "map");*/ op_decl_vec(&x, &nodes, 1, sizeof(Real), p_x, "x"); op_decl_vec(&y, &nodes, 1, sizeof(Real), p_y, "y"); op_decl_vec(&xn, &nodes, 1, sizeof(Real), p_xn, "xn"); op_decl_sparsity(&mat_sparsity, &elem_node, &elem_node); /*dump_sparsity(&mat_sparsity, "sparsity");*/ op_decl_mat(&mat, &nodes, &nodes, 1, sizeof(Real), &mat_sparsity, "matrix"); /*dump_dat(&mat, "matrix");*/ op_diagnostic_output(); // Fix the values of the boundary nodes to get a unique solution Real val = 1e308; int idx = 0; op_mat_addto(&mat, &val, 1, &idx, 1, &idx); idx = NN; op_mat_addto(&mat, &val, 1, &idx, 1, &idx); // construct the matrix op_par_loop_2((void(*)(void*,void*))laplace, "laplace", &elements, op_construct_mat_arg(&mat, OP_ALL, &elem_node, OP_ALL, &elem_node, OP_INC), op_construct_vec_arg(&xn, 0, &elem_node, OP_READ)); // spmv /*op_mat_mult(&mat, &x, &y);*/ // solve op_solve(&mat, &x, &y); for (int i = 0; i < nnode; ++i) { printf("%f\n", p_x[i]); } op_exit(); }
// attempt to allocate a new operator from the static memory pool, return index s16 net_add_op(op_id_t opId) { u16 ins, outs; int i, j; int idxOld, idxNew; op_t* op; s32 numInsSave = net->numIns; s32 numOutsSave = net->numOuts; print_dbg("\r\n adding operator; old input count: "); print_dbg_ulong(numInsSave); if (net->numOps >= NET_OPS_MAX) { return -1; } print_dbg(" , op class: "); print_dbg_ulong(opId); print_dbg(" , size: "); print_dbg_ulong(op_registry[opId].size); if (op_registry[opId].size > NET_OP_POOL_SIZE - net->opPoolOffset) { print_dbg("\r\n op creation failed; op memory pool is exhausted."); return -1; } print_dbg(" ; allocating... "); op = (op_t*)((u8*)net->opPool + net->opPoolOffset); // use the class ID to initialize a new object in scratch print_dbg(" ; initializing... "); op_init(op, opId); ins = op->numInputs; outs = op->numOutputs; if (ins > (NET_INS_MAX - net->numIns)) { print_dbg("\r\n op creation failed; too many inputs in network."); return -1; } if (outs > (NET_OUTS_MAX - net->numOuts)) { print_dbg("\r\n op creation failed; too many outputs in network."); return -1; } // add op pointer to list net->ops[net->numOps] = op; // advance offset for next allocation net->opPoolOffset += op_registry[opId].size; //---- add inputs and outputs to node list for(i=0; i<ins; ++i) { net->ins[net->numIns].opIdx = net->numOps; net->ins[net->numIns].opInIdx = i; ++(net->numIns); } for(i=0; i<outs; i++) { net->outs[net->numOuts].opIdx = net->numOps; net->outs[net->numOuts].opOutIdx = i; net->outs[net->numOuts].target = -1; ++(net->numOuts); } if(net->numOps > 0) { // if we added input nodes, need to adjust connections to DSP params for(i=0; i < numOutsSave; i++) { /* print_dbg("\r\n checking output no. "); */ /* print_dbg_ulong(i); */ /* print_dbg(" ; target: "); */ /* print_dbg_ulong(net->outs[i].target); */ if(net->outs[i].target >= numInsSave) { /* print_dbg("\r\n adjusting target after op creation; old op count: "); */ /* print_dbg_ulong(net->numOps); */ /* print_dbg(" , output index: "); */ /* print_dbg_ulong(i); */ /* print_dbg(" , current target "); */ /* print_dbg_ulong(net->outs[i].target); */ /* print_dbg(" , count of inputs in new op: "); */ /* print_dbg_ulong(ins); */ // preset target, add offset for new inputs net_connect(i, net->outs[i].target + ins); } /// do the same in all presets! for(j=0; j<NET_PRESETS_MAX; j++) { if(preset_out_enabled(j, i)) { s16 tar = presets[j].outs[i].target; if(tar >= numInsSave) { tar = tar + ins; presets[j].outs[i].target = tar; } } } // preset loop } // outs loop for(i=0; i<NET_PRESETS_MAX; i++) { // shift parameter nodes in preset data for(j=net->numParams - 1; j>=0; j--) { // this was the old param index idxOld = j + numInsSave; // copy to new param index idxNew = idxOld + ins; if(idxNew >= PRESET_INODES_COUNT) { print_dbg("\r\n out of preset input nodes in new op creation! "); continue; } else { presets[i].ins[idxNew].value = presets[i].ins[idxOld].value; presets[i].ins[idxNew].enabled = presets[i].ins[idxOld].enabled; // clear the old data. it may correspond to new operator inputs. presets[i].ins[idxOld].enabled = 0; presets[i].ins[idxOld].value = 0; } } } } ++(net->numOps); return net->numOps - 1; }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *bnode, *cell, *g_bnode, *g_cell; double *xm, *g_xm;; int nnode,ncell,nbnodes,niter, g_nnode, g_ncell, g_nbnodes; double rms = 1; // read in grid op_printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("FE_grid.dat","r")) == NULL) { op_printf("can't open file FE_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d \n",&g_nnode, &g_ncell, &g_nbnodes) != 3) { op_printf("error reading from new_grid.dat\n"); exit(-1); } if (my_rank == MPI_ROOT) { g_cell = (int *) malloc(4*g_ncell*sizeof(int)); g_bnode = (int *) malloc(g_nbnodes*sizeof(int)); g_xm = (double *) malloc(2*g_nnode*sizeof(double)); for (int n=0; n<g_nnode; n++) { if (fscanf(fp,"%lf %lf \n",&g_xm[2*n], &g_xm[2*n+1]) != 2) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<g_ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&g_cell[4*n ], &g_cell[4*n+1], &g_cell[4*n+2], &g_cell[4*n+3]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<g_nbnodes; n++) { if (fscanf(fp,"%d \n",&g_bnode[n]) != 1) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } } fclose(fp); nnode = compute_local_size (g_nnode, comm_size, my_rank); ncell = compute_local_size (g_ncell, comm_size, my_rank); nbnodes = compute_local_size (g_nbnodes, comm_size, my_rank); cell = (int *) malloc(4*ncell*sizeof(int)); bnode = (int *) malloc(nbnodes*sizeof(int)); xm = (double *) malloc(2*nnode*sizeof(double)); scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4); scatter_int_array(g_bnode, bnode, comm_size, g_nbnodes,nbnodes, 1); scatter_double_array(g_xm, xm, comm_size, g_nnode,nnode, 2); if(my_rank == MPI_ROOT) { free(g_cell); free(g_xm); free(g_bnode); } // set constants and initialise flow field and residual op_printf("initialising flow field \n"); double gam = 1.4; gm1 = gam - 1.0; gm1i = 1.0/gm1; wtg1[0] = 0.5; wtg1[1] = 0.5; xi1[0] = 0.211324865405187; xi1[1] = 0.788675134594813; Ng1[0] = 0.788675134594813; Ng1[1] = 0.211324865405187; Ng1[2] = 0.211324865405187; Ng1[3] = 0.788675134594813; Ng1_xi[0] = -1; Ng1_xi[1] = -1; Ng1_xi[2] = 1; Ng1_xi[3] = 1; wtg2[0] = 0.25; wtg2[1] = 0.25; wtg2[2] = 0.25; wtg2[3] = 0.25; Ng2[0] = 0.622008467928146; Ng2[1] = 0.166666666666667; Ng2[2] = 0.166666666666667; Ng2[3] = 0.044658198738520; Ng2[4] = 0.166666666666667; Ng2[5] = 0.622008467928146; Ng2[6] = 0.044658198738520; Ng2[7] = 0.166666666666667; Ng2[8] = 0.166666666666667; Ng2[9] = 0.044658198738520; Ng2[10] = 0.622008467928146; Ng2[11] = 0.166666666666667; Ng2[12] = 0.044658198738520; Ng2[13] = 0.166666666666667; Ng2[14] = 0.166666666666667; Ng2[15] = 0.622008467928146; Ng2_xi[0] = -0.788675134594813; Ng2_xi[1] = 0.788675134594813; Ng2_xi[2] = -0.211324865405187;Ng2_xi[3] = 0.211324865405187; Ng2_xi[4] = -0.788675134594813; Ng2_xi[5] = 0.788675134594813; Ng2_xi[6] = -0.211324865405187; Ng2_xi[7] = 0.211324865405187; Ng2_xi[8] = -0.211324865405187; Ng2_xi[9] = 0.211324865405187; Ng2_xi[10] = -0.788675134594813; Ng2_xi[11] = 0.788675134594813; Ng2_xi[12] = -0.211324865405187; Ng2_xi[13] = 0.211324865405187; Ng2_xi[14] = -0.788675134594813; Ng2_xi[15] = 0.788675134594813; Ng2_xi[16] = -0.788675134594813; Ng2_xi[17] = -0.211324865405187; Ng2_xi[18] = 0.788675134594813; Ng2_xi[19] = 0.211324865405187; Ng2_xi[20] = -0.211324865405187; Ng2_xi[21] = -0.788675134594813; Ng2_xi[22] = 0.211324865405187; Ng2_xi[23] = 0.788675134594813; Ng2_xi[24] = -0.788675134594813; Ng2_xi[25] = -0.211324865405187; Ng2_xi[26] = 0.788675134594813; Ng2_xi[27] = 0.211324865405187; Ng2_xi[28] = -0.211324865405187; Ng2_xi[29] = -0.788675134594813; Ng2_xi[30] = 0.211324865405187; Ng2_xi[31] = 0.788675134594813; minf = 0.1; m2 = minf*minf; freq = 1; kappa = 1; nmode = 0; mfan = 1.0; double *phim = (double *)malloc(nnode*sizeof(double)); memset(phim,0,nnode*sizeof(double)); for (int i = 0;i<nnode;i++) { phim[i] = minf*xm[2*i]; } double *K = (double *)malloc(4*4*ncell*sizeof(double)); memset(K,0,4*4*ncell*sizeof(double)); double *resm = (double *)malloc(nnode*sizeof(double)); memset(resm,0,nnode*sizeof(double)); double *V = (double *)malloc(nnode*sizeof(double)); memset(V,0,nnode*sizeof(double)); double *P = (double *)malloc(nnode*sizeof(double)); memset(P,0,nnode*sizeof(double)); double *U = (double *)malloc(nnode*sizeof(double)); memset(U,0,nnode*sizeof(double)); // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set bnodes = op_decl_set(nbnodes, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pbnodes = op_decl_map(bnodes,nodes,1,bnode, "pbedge"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_xm = op_decl_dat(nodes ,2,"double",xm ,"p_x"); op_dat p_phim = op_decl_dat(nodes, 1, "double", phim, "p_phim"); op_dat p_resm = op_decl_dat(nodes, 1, "double", resm, "p_resm"); op_dat p_K = op_decl_dat(cells, 16, "double:soa", K, "p_K"); op_dat p_V = op_decl_dat(nodes, 1, "double", V, "p_V"); op_dat p_P = op_decl_dat(nodes, 1, "double", P, "p_P"); op_dat p_U = op_decl_dat(nodes, 1, "double", U, "p_U"); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&gm1i ); op_decl_const(1,"double",&m2 ); op_decl_const(2,"double",wtg1 ); op_decl_const(2,"double",xi1 ); op_decl_const(4,"double",Ng1 ); op_decl_const(4,"double",Ng1_xi ); op_decl_const(4,"double",wtg2 ); op_decl_const(16,"double",Ng2 ); op_decl_const(32,"double",Ng2_xi ); op_decl_const(1,"double",&minf ); op_decl_const(1,"double",&freq ); op_decl_const(1,"double",&kappa ); op_decl_const(1,"double",&nmode ); op_decl_const(1,"double",&mfan ); op_diagnostic_output(); op_partition("PTSCOTCH", "KWAY", cells, pcell, NULL); // main time-marching loop niter = 20; //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); for(int iter=1; iter<=niter; iter++) { op_par_loop(res_calc,"res_calc",cells, op_arg_dat(p_xm, -4, pcell, 2,"double",OP_READ), op_arg_dat(p_phim, -4, pcell, 1,"double",OP_READ), op_arg_dat(p_K, -1, OP_ID, 16,"double:soa",OP_WRITE), op_arg_dat(p_resm, -4, pcell, 1,"double",OP_INC) ); op_par_loop(dirichlet,"dirichlet",bnodes, op_arg_dat(p_resm, 0, pbnodes, 1,"double",OP_WRITE)); double c1 = 0; double c2 = 0; double c3 = 0; double alpha = 0; double beta = 0; //c1 = R'*R; op_par_loop(init_cg, "init_cg", nodes, op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&c1, 1, "double", OP_INC), op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_WRITE), op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_WRITE), op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_WRITE)); //set up stopping conditions double res0 = sqrt(c1); double res = res0; int inner_iter = 0; int maxiter = 200; while (res > 0.1*res0 && inner_iter < maxiter) { //V = Stiffness*P op_par_loop(spMV, "spMV", cells, op_arg_dat(p_V, -4, pcell, 1, "double", OP_INC), op_arg_dat(p_K, -1, OP_ID, 16, "double:soa", OP_READ), op_arg_dat(p_P, -4, pcell, 1, "double", OP_READ)); op_par_loop(dirichlet,"dirichlet",bnodes, op_arg_dat(p_V, 0, pbnodes, 1,"double",OP_WRITE)); c2 = 0; //c2 = P'*V; op_par_loop(dotPV, "dotPV", nodes, op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_READ), op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&c2, 1, "double", OP_INC)); alpha = c1/c2; //U = U + alpha*P; //resm = resm-alpha*V; op_par_loop(updateUR, "updateUR", nodes, op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_INC), op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_INC), op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_READ), op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_RW), op_arg_gbl(&alpha, 1, "double", OP_READ)); c3 = 0; //c3 = resm'*resm; op_par_loop(dotR, "dotR", nodes, op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&c3, 1, "double", OP_INC)); beta = c3/c1; //P = beta*P+resm; op_par_loop(updateP, "updateP", nodes, op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ), op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_RW), op_arg_gbl(&beta, 1, "double", OP_READ)); c1 = c3; res = sqrt(c1); inner_iter++; } rms = 0; //phim = phim - Stiffness\Load; op_par_loop(update, "update", nodes, op_arg_dat(p_phim, -1, OP_ID, 1, "double", OP_RW), op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_WRITE), op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&rms, 1, "double", OP_INC)); op_printf("rms = %10.5e iter: %d\n", sqrt(rms)/sqrt(g_nnode), inner_iter); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); /*free(cell); free(bnode); free(xm); free(phim); free(K); free(resm); free(V); free(P); free(U);*/ }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *pp; double *A, *r, *u, *du; int nnode, nedge; /**------------------------BEGIN I/O and PARTITIONING ---------------------**/ int g_nnode, g_nedge, g_n, g_e; g_nnode = (NN-1)*(NN-1); g_nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2); int *g_pp = 0; double *g_A = 0, *g_r = 0, *g_u = 0, *g_du = 0; op_printf("Global number of nodes, edges = %d, %d\n",g_nnode,g_nedge); if(my_rank == MPI_ROOT) { g_pp = (int *)malloc(sizeof(int)*2*g_nedge); g_A = (double *)malloc(sizeof(double)*g_nedge); g_r = (double *)malloc(sizeof(double)*g_nnode); g_u = (double *)malloc(sizeof(double)*g_nnode); g_du = (double *)malloc(sizeof(double)*g_nnode); // create matrix and r.h.s., and set coordinates needed for renumbering / partitioning g_e = 0; for (int i=1; i<NN; i++) { for (int j=1; j<NN; j++) { g_n = i-1 + (j-1)*(NN-1); g_r[g_n] = 0.0f; g_u[g_n] = 0.0f; g_du[g_n] = 0.0f; g_pp[2*g_e] = g_n; g_pp[2*g_e+1] = g_n; g_A[g_e] = -1.0f; g_e++; for (int pass=0; pass<4; pass++) { int i2 = i; int j2 = j; if (pass==0) i2 += -1; if (pass==1) i2 += 1; if (pass==2) j2 += -1; if (pass==3) j2 += 1; if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) { g_r[g_n] += 0.25f; } else { g_pp[2*g_e] = g_n; g_pp[2*g_e+1] = i2-1 + (j2-1)*(NN-1); g_A[g_e] = 0.25f; g_e++; } } } } } /* Compute local sizes */ nnode = compute_local_size (g_nnode, comm_size, my_rank); nedge = compute_local_size (g_nedge, comm_size, my_rank); op_printf("Number of nodes, edges on process %d = %d, %d\n" ,my_rank,nnode,nedge); /*Allocate memory to hold local sets, mapping tables and data*/ pp = (int *)malloc(2*sizeof(int)*nedge); A = (double *) malloc(nedge*sizeof(double)); r = (double *) malloc(nnode*sizeof(double)); u = (double *) malloc(nnode*sizeof(double)); du = (double *) malloc(nnode*sizeof(double)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_pp, pp, comm_size, g_nedge,nedge, 2); scatter_double_array(g_A, A, comm_size, g_nedge,nedge, 1); scatter_double_array(g_r, r, comm_size, g_nnode,nnode, 1); scatter_double_array(g_u, u, comm_size, g_nnode,nnode, 1); scatter_double_array(g_du, du, comm_size, g_nnode,nnode, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if(my_rank == MPI_ROOT) { free(g_pp); free(g_A); free(g_r); free(g_u); free(g_du); } /**------------------------END I/O and PARTITIONING ---------------------**/ // declare sets, pointers, and datasets op_set nodes = op_decl_set(nnode,"nodes"); op_set edges = op_decl_set(nedge,"edges"); op_map ppedge = op_decl_map(edges,nodes,2,pp, "ppedge"); op_dat p_A = op_decl_dat(edges,1,"double", A, "p_A" ); op_dat p_r = op_decl_dat(nodes,1,"double", r, "p_r" ); op_dat p_u = op_decl_dat(nodes,1,"double", u, "p_u" ); op_dat p_du = op_decl_dat(nodes,1,"double", du,"p_du"); alpha = 1.0f; op_decl_const(1,"double",&alpha); op_diagnostic_output(); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", NULL, NULL, NULL); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main iteration loop double u_sum, u_max, beta = 1.0f; for (int iter=0; iter<NITER; iter++) { op_par_loop(res,"res", edges, op_arg_dat(p_A, -1,OP_ID, 1,"double", OP_READ), op_arg_dat(p_u, 1,ppedge, 1,"double", OP_READ), op_arg_dat(p_du, 0,ppedge, 1,"double", OP_INC), op_arg_gbl(&beta, 1,"double", OP_READ)); u_sum = 0.0f; u_max = 0.0f; op_par_loop(update,"update", nodes, op_arg_dat(p_r, -1,OP_ID, 1,"double",OP_READ), op_arg_dat(p_du, -1,OP_ID, 1,"double",OP_RW), op_arg_dat(p_u, -1,OP_ID, 1,"double",OP_INC), op_arg_gbl(&u_sum,1,"double",OP_INC), op_arg_gbl(&u_max,1,"double",OP_MAX)); op_printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/g_nnode)); } op_timers(&cpu_t2, &wall_t2); //get results data array op_dat temp = op_mpi_get_data(p_u); //output the result dat array to files print_dat_tofile(temp, "out_grid.dat"); //ASCI //print_dat_tobinfile(temp, "out_grid.bin"); //Binary //print each mpi process's timing info for each kernel op_timing_output(); //print total time for niter interations op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); }
int main(int argc, char **argv){ int nnode, nedge, n, e; float dx; nnode = (NN-1)*(NN-1); nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2); dx = 1.0f / ((float) NN); int *pp = (int *)malloc(sizeof(int)*2*nedge); int *p1 = (int *)malloc(sizeof(int)*nedge); int *p2 = (int *)malloc(sizeof(int)*nedge); float *xe = (float *)malloc(sizeof(float)*2*nedge); float *xn = (float *)malloc(sizeof(float)*2*nnode); double *A = (double *)malloc(sizeof(double)*3*nedge); float *r = (float *)malloc(sizeof(float)*2*nnode); float *u = (float *)malloc(sizeof(float)*2*nnode); float *du = (float *)malloc(sizeof(float)*3*nnode); // create matrix and r.h.s., and set coordinates needed for renumbering / partitioning e = 0; for (int i=1; i<NN; i++) { for (int j=1; j<NN; j++) { n = i-1 + (j-1)*(NN-1); r[2*n] = 0.0f; u[2*n] = 0.0f; du[3*n] = 0.0f; xn[2*n ] = i*dx; xn[2*n+1] = j*dx; p1[e] = n; p2[e] = n; pp[2*e] = p1[e]; pp[2*e+1] = p2[e]; A[3*e] = -1.0f; xe[2*e ] = i*dx; xe[2*e+1] = j*dx; e++; for (int pass=0; pass<4; pass++) { int i2 = i; int j2 = j; if (pass==0) i2 += -1; if (pass==1) i2 += 1; if (pass==2) j2 += -1; if (pass==3) j2 += 1; if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) { r[2*n] += 0.25f; } else { p1[e] = n; p2[e] = i2-1 + (j2-1)*(NN-1); pp[2*e] = p1[e]; pp[2*e+1] = p2[e]; A[3*e] = 0.25f; xe[2*e ] = i*dx; xe[2*e+1] = j*dx; e++; } } } } // OP initialisation op_init(argc,argv,5); // declare sets, pointers, and datasets op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_map ppedge = op_decl_map(edges,nodes,2,pp, "ppedge"); op_dat p_A = op_decl_dat(edges,3,"double",A, "p_A" ); op_dat p_r = op_decl_dat(nodes,2,"float", r, "p_r" ); op_dat p_u = op_decl_dat(nodes,2,"float", u, "p_u" ); op_dat p_du = op_decl_dat(nodes,3,"float", du, "p_du"); alpha = 2.0f; op_decl_const(1,"float",&alpha); alpha = 1.0f; op_decl_const(1,"float",&alpha); op_diagnostic_output(); // main iteration loop float u_sum, u_max, beta = 1.0f; for (int iter=0; iter<NITER; iter++) { op_par_loop(res,"res", edges, op_arg_dat(p_A, -1,OP_ID, 3,"double",OP_READ), op_arg_dat(p_u, 1,ppedge, 2,"float", OP_READ), op_arg_dat(p_du, 0,ppedge, 3,"float", OP_INC ), op_arg_gbl(&beta,1,"float",OP_READ)); u_sum = 0.0f; u_max = 0.0f; op_par_loop(update,"update", nodes, op_arg_dat(p_r, -1,OP_ID, 2,"float",OP_READ), op_arg_dat(p_du, -1,OP_ID, 3,"float",OP_RW ), op_arg_dat(p_u, -1,OP_ID, 2,"float",OP_INC ), op_arg_gbl(&u_sum,1,"float",OP_INC), op_arg_gbl(&u_max,1,"float",OP_MAX)); printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/nnode)); } // print out results printf("\n Results after %d iterations:\n\n",NITER); op_fetch_data(p_u); /* op_fetch_data(p_du); op_fetch_data(p_r); */ for (int pass=0; pass<1; pass++) { /* if(pass==0) printf("\narray u\n"); else if(pass==1) printf("\narray du\n"); else if(pass==2) printf("\narray r\n"); */ for (int j=NN-1; j>0; j--) { for (int i=1; i<NN; i++) { if (pass==0) printf(" %7.4f",u[2*(i-1 + (j-1)*(NN-1))]); else if (pass==1) printf(" %7.4f",du[i-1 + (j-1)*(NN-1)]); else if (pass==2) printf(" %7.4f",r[2*(i-1 + (j-1)*(NN-1))]); } printf("\n"); } printf("\n"); } op_timing_output(); op_exit(); // free allocated arrays free(pp); free(A); free(r); free(u); free(du); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; double rms; /**------------------------BEGIN I/O and PARTITIONING -------------------**/ op_timers(&cpu_t1, &wall_t1); /* read in grid from disk on root processor */ FILE *fp; if ( (fp = fopen("new_grid.dat","r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } int g_nnode,g_ncell,g_nedge,g_nbedge; check_scan(fscanf(fp,"%d %d %d %d \n",&g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4); int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0; double *g_x = 0,*g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0; // set constants op_printf("initialising flow field\n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; op_printf("reading in grid \n"); op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n" ,g_nnode,g_ncell,g_nedge,g_nbedge); if(my_rank == MPI_ROOT) { g_cell = (int *) malloc(4*g_ncell*sizeof(int)); g_edge = (int *) malloc(2*g_nedge*sizeof(int)); g_ecell = (int *) malloc(2*g_nedge*sizeof(int)); g_bedge = (int *) malloc(2*g_nbedge*sizeof(int)); g_becell = (int *) malloc( g_nbedge*sizeof(int)); g_bound = (int *) malloc( g_nbedge*sizeof(int)); g_x = (double *) malloc(2*g_nnode*sizeof(double)); g_q = (double *) malloc(4*g_ncell*sizeof(double)); g_qold = (double *) malloc(4*g_ncell*sizeof(double)); g_res = (double *) malloc(4*g_ncell*sizeof(double)); g_adt = (double *) malloc( g_ncell*sizeof(double)); for (int n=0; n<g_nnode; n++){ check_scan(fscanf(fp,"%lf %lf \n",&g_x[2*n], &g_x[2*n+1]), 2); } for (int n=0; n<g_ncell; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_cell[4*n ], &g_cell[4*n+1], &g_cell[4*n+2], &g_cell[4*n+3]), 4); } for (int n=0; n<g_nedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_edge[2*n],&g_edge[2*n+1], &g_ecell[2*n],&g_ecell[2*n+1]), 4); } for (int n=0; n<g_nbedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_bedge[2*n],&g_bedge[2*n+1], &g_becell[n],&g_bound[n]), 4); } //initialise flow field and residual for (int n=0; n<g_ncell; n++) { for (int m=0; m<4; m++) { g_q[4*n+m] = qinf[m]; g_res[4*n+m] = 0.0f; } } } fclose(fp); nnode = compute_local_size (g_nnode, comm_size, my_rank); ncell = compute_local_size (g_ncell, comm_size, my_rank); nedge = compute_local_size (g_nedge, comm_size, my_rank); nbedge = compute_local_size (g_nbedge, comm_size, my_rank); op_printf("Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n" ,my_rank,nnode,ncell,nedge,nbedge); /*Allocate memory to hold local sets, mapping tables and data*/ cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (double *) malloc(2*nnode*sizeof(double)); q = (double *) malloc(4*ncell*sizeof(double)); qold = (double *) malloc(4*ncell*sizeof(double)); res = (double *) malloc(4*ncell*sizeof(double)); adt = (double *) malloc( ncell*sizeof(double)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4); scatter_int_array(g_edge, edge, comm_size, g_nedge,nedge, 2); scatter_int_array(g_ecell, ecell, comm_size, g_nedge,nedge, 2); scatter_int_array(g_bedge, bedge, comm_size, g_nbedge,nbedge, 2); scatter_int_array(g_becell, becell, comm_size, g_nbedge,nbedge, 1); scatter_int_array(g_bound, bound, comm_size, g_nbedge,nbedge, 1); scatter_double_array(g_x, x, comm_size, g_nnode,nnode, 2); scatter_double_array(g_q, q, comm_size, g_ncell,ncell, 4); scatter_double_array(g_qold, qold, comm_size, g_ncell,ncell, 4); scatter_double_array(g_res, res, comm_size, g_ncell,ncell, 4); scatter_double_array(g_adt, adt, comm_size, g_ncell,ncell, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if(my_rank == MPI_ROOT) { free(g_cell); free(g_edge); free(g_ecell); free(g_bedge); free(g_becell); free(g_bound); free(g_x ); free(g_q); free(g_qold); free(g_adt); free(g_res); } op_timers(&cpu_t2, &wall_t2); op_printf("Max total file read time = %f\n", wall_t2-wall_t1); /**------------------------END I/O and PARTITIONING -----------------------**/ // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"double",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"double",q ,"p_q"); //op_dat p_qold = op_decl_dat(cells ,4,"double",qold ,"p_qold"); //op_dat p_adt = op_decl_dat(cells ,1,"double",adt ,"p_adt"); //op_dat p_res = op_decl_dat(cells ,4,"double",res ,"p_res"); // p_res, p_adt and p_qold now declared as a temp op_dats during // the execution of the time-marching loop op_decl_const2("gam",1,"double",&gam ); op_decl_const2("gm1",1,"double",&gm1 ); op_decl_const2("cfl",1,"double",&cfl ); op_decl_const2("eps",1,"double",&eps ); op_decl_const2("mach",1,"double",&mach ); op_decl_const2("alpha",1,"double",&alpha); op_decl_const2("qinf",4,"double",qinf ); op_diagnostic_output(); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", cells, pecell, p_x); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); niter = 1000; for(int iter=1; iter<=niter; iter++) { double* tmp_elem = NULL; op_dat p_res = op_decl_dat_temp(cells ,4,"double",tmp_elem,"p_res"); op_dat p_adt = op_decl_dat_temp(cells ,1,"double",tmp_elem,"p_adt"); op_dat p_qold = op_decl_dat_temp(cells ,4,"double",qold ,"p_qold"); //save old flow solution op_par_loop_save_soln("save_soln",cells, op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop_adt_calc("adt_calc",cells, op_arg_dat(p_x,0,pcell,2,"double",OP_READ), op_arg_dat(p_x,1,pcell,2,"double",OP_READ), op_arg_dat(p_x,2,pcell,2,"double",OP_READ), op_arg_dat(p_x,3,pcell,2,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_WRITE)); // calculate flux residual op_par_loop_res_calc("res_calc",edges, op_arg_dat(p_x,0,pedge,2,"double",OP_READ), op_arg_dat(p_x,1,pedge,2,"double",OP_READ), op_arg_dat(p_q,0,pecell,4,"double",OP_READ), op_arg_dat(p_q,1,pecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pecell,1,"double",OP_READ), op_arg_dat(p_adt,1,pecell,1,"double",OP_READ), op_arg_dat(p_res,0,pecell,4,"double",OP_INC), op_arg_dat(p_res,1,pecell,4,"double",OP_INC)); op_par_loop_bres_calc("bres_calc",bedges, op_arg_dat(p_x,0,pbedge,2,"double",OP_READ), op_arg_dat(p_x,1,pbedge,2,"double",OP_READ), op_arg_dat(p_q,0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pbecell,1,"double",OP_READ), op_arg_dat(p_res,0,pbecell,4,"double",OP_INC), op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ)); // update flow field rms = 0.0; op_par_loop_update("update",cells, op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_WRITE), op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&rms,1,"double",OP_INC)); } //print iteration history rms = sqrt(rms/(double) g_ncell); if (iter%100 == 0) op_printf("%d %10.5e \n",iter,rms); if (op_free_dat_temp(p_res) < 0) op_printf("Error: temporary op_dat %s cannot be removed\n",p_res->name); if (op_free_dat_temp(p_adt) < 0) op_printf("Error: temporary op_dat %s cannot be removed\n",p_adt->name); if (op_free_dat_temp(p_qold) < 0) op_printf("Error: temporary op_dat %s cannot be removed\n",p_qold->name); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); //print total time for niter interations op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc, char **argv){ int *becell, *ecell, *bound, *bedge, *edge, *cell; float *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; float rms; // read in grid printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("/work/rr908/airfoil/new_grid.dat","r")) == NULL) { printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (float *) malloc(2*nnode*sizeof(float)); q = (float *) malloc(4*ncell*sizeof(float)); qold = (float *) malloc(4*ncell*sizeof(float)); res = (float *) malloc(4*ncell*sizeof(float)); adt = (float *) malloc( ncell*sizeof(float)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%f %f \n",&x[2*n], &x[2*n+1]) != 2) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); #ifdef DIAGNOSTIC print_array((float *) x, nnode, "initial_nodes"); print_array((float *) cell, ncell, "initial_cells"); FILE *flog; flog = fopen( "initial_cells_cellarray", "w" ); for( int i=0; i< ncell; ++i ) { fprintf( flog, "%d %d %d %d\n", cell[4*i], cell[4*i+1], cell[4*i+2], cell[4*i+3] ); } fclose( flog ); print_array((float *) edge, nedge, "initial_edges"); print_array((float *) ecell, nedge, "initiall_edges_for_cell"); print_array((float *) bedge, nbedge, "initial_border_edges"); print_array((float *) becell, nbedge, "initial_becell"); print_array((float *) bound, nbedge, "initial bound"); #endif // set constants and initialise flow field and residual printf("initialising flow field \n"); g_const.gam = 1.4f; g_const.gm1 = g_const.gam - 1.0f; g_const.cfl = 0.9f; g_const.eps = 0.05f; g_const.mach = 0.4f; g_const.alpha = 3.0f*atan(1.0f)/45.0f; float p = 1.0f; float r = 1.0f; float u = sqrt(g_const.gam*p/r)*g_const.mach; float e = p/(r*g_const.gm1) + 0.5f*u*u; g_const.qinf[0] = r; g_const.qinf[1] = r*u; g_const.qinf[2] = 0.0f; g_const.qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = g_const.qinf[m]; res[4*n+m] = 0.0f; } } // OP initialisation printf("OP initialisation\n"); op_init(argc,argv,2); g_const_d = op_allocate_constant( &g_const, sizeof( struct global_constants ) ); // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"float",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"float",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"float",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"float",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"float",res ,"p_res"); op_decl_const2("gam",1,"float",&g_const.gam ); op_decl_const2("gm1",1,"float",&g_const.gm1 ); op_decl_const2("cfl",1,"float",&g_const.cfl ); op_decl_const2("eps",1,"float",&g_const.eps ); op_decl_const2("mach",1,"float",&g_const.mach ); op_decl_const2("alpha",1,"float",&g_const.alpha); op_decl_const2("qinf",4,"float",g_const.qinf ); op_diagnostic_output(); #ifdef DIAGNOSTIC dump_array(p_bound, "initial_dat_p_bound"); dump_array(p_x, "initial_dat_p_x"); dump_array(p_q, "initiall_dat_p_q"); dump_array(p_qold, "initial_dat_p_qold"); dump_array(p_adt, "initial_dat_p_adt"); dump_array(p_res, "initial_dat_res"); #endif // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution // dump_array(p_q, "p_q_iter_before"); // dump_array(p_qold, "p_q_old_iter_before"); op_par_loop_save_soln("save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_WRITE)); // dump_array(p_q, "p_q_iter_after"); // dump_array(p_qold, "p_q_old_iter_after"); /* if ( iter == 1 ) { dump_array( p_qold, "p_qold" ); } */ #ifdef DIAGNOSTIC if (iter==1) { dump_array( p_qold, "p_qold" ); } #endif //dump_array( p_qold, "p_qold" ); //op_fetch_data( p_qold ); //print_array( ( float *) p_qold->data, 4*p_qold->set->size, "p_qold" ); // print_array( p_q, "p_qold2" ); // print_array( p_qold, "p_qold" ); //assert( p_q->data[0] != 0.0f ); // predictor/corrector update loop // dump_array(p_adt, "p_adt_before"); for(int k=0; k<2; k++) { // calculate area/timstep if(k == 0 && iter == 0) { printf("Dumping adt before adt_calc execution array"); op_fetch_data( p_adt ); float* array = (float *) p_adt->data; long size = p_adt->set->size; for(long elem = 0; elem < size; ++elem) { printf("%lf",array[elem]); } } op_par_loop_adt_calc("adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"float",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"float",OP_WRITE)); if(k == 0 && iter == 0) { printf("Dumping adt after 1x adt_calc execution array"); op_fetch_data( p_adt ); float* array = (float *) p_adt->data; long size = p_adt->set->size; for(long elem = 0; elem < size; ++elem) { printf("%lf",array[elem]); } } #ifdef DIAGNOSTIC if (iter==1 && k==0) { dump_array( p_adt, "p_adt0" ); } if (iter==1 && k==1) { dump_array( p_adt, "p_adt1" ); } #endif // dump_array(p_adt, "p_adt_after"); // calculate flux residual op_par_loop_res_calc("res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"float",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"float",OP_READ), op_arg_dat(p_q, 0,pecell,4,"float",OP_READ), op_arg_dat(p_q, 1,pecell,4,"float",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"float",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"float",OP_READ), op_arg_dat(p_res, 0,pecell,4,"float",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"float",OP_INC )); #ifdef DIAGNOSTIC if (iter==1 && k==0) { dump_array( p_res, "p_res0" ); } if (iter==1 && k==1) { dump_array( p_res, "p_res1" ); } #endif op_par_loop_bres_calc("bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"float",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"float",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"float",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"float",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"float",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); #ifdef DIAGNOSTIC if (iter==1 && k==0) { dump_array( p_res, "p_res_a0" ); } if (iter==1 && k==0) { dump_array( p_res, "p_res_a1" ); } #endif // update flow field rms = 0.0; op_par_loop_update("update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"float",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"float",OP_READ ), op_arg_gbl(&rms,1,"float",OP_INC)); } #ifdef DIAGNOSTIC if (iter==1) { dump_array( p_q, "p_q1" ); } #endif // print iteration history rms = sqrt(rms/(float) ncell); if (iter%100 == 0) printf(" %d %10.5e \n",iter,rms); } op_timing_output(); #ifdef DIAGNOSTIC dump_array( p_q, "p_q" ); #endif }