static int get_solaris_eeprom_parameter(char *parameter,char *outbuffer) { int fd=0,status=0; struct openpromio *openprominfo=NULL; fd=open("/dev/openprom",O_RDONLY); if ( fd == -1 ) { snmp_log(LOG_ERR,"cannot open /dev/openprom\n"); return 1; } openprominfo=(struct openpromio *)op_malloc(8192); if(!openprominfo) return 1; strcpy(openprominfo->oprom_array,parameter); status=ioctl(fd,OPROMGETOPT,openprominfo); if ( status == -1 ) { snmp_log(LOG_ERR,"cannot read from /dev/openprom\n"); close(fd); op_free(openprominfo); return 1; } strcpy(outbuffer,openprominfo->oprom_array); op_free(openprominfo); /* close file */ close(fd); return(0); }
static void scatter_int_array(int *g_array, int *l_array, int comm_size, int g_size, int l_size, int elem_size) { int *sendcnts = (int *)op_malloc(comm_size * sizeof(int)); int *displs = (int *)op_malloc(comm_size * sizeof(int)); int disp = 0; for (int i = 0; i < comm_size; i++) { sendcnts[i] = elem_size * compute_local_size(g_size, comm_size, i); } for (int i = 0; i < comm_size; i++) { displs[i] = disp; disp = disp + sendcnts[i]; } MPI_Scatterv(g_array, sendcnts, displs, MPI_INT, l_array, l_size * elem_size, MPI_INT, MPI_ROOT, MPI_COMM_WORLD); free(sendcnts); free(displs); }
int RepList::add(char * pat1, char * pat2) { if (pos >= size || pat1 == NULL || pat2 == NULL) return 1; replentry * r = (replentry *) op_malloc(sizeof(replentry)); if (r == NULL) return 1; r->pattern = mystrrep(pat1, "_", " "); r->pattern2 = mystrrep(pat2, "_", " "); r->start = false; r->end = false; dat[pos++] = r; for (int i = pos - 1; i > 0; i--) { r = dat[i]; if (op_strcmp(r->pattern, dat[i - 1]->pattern) < 0) { dat[i] = dat[i - 1]; dat[i - 1] = r; } else break; } return 0; }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; double rms; /**------------------------BEGIN I/O and PARTITIONING -------------------**/ op_timers(&cpu_t1, &wall_t1); /* read in grid from disk on root processor */ FILE *fp; if ( (fp = fopen("new_grid.dat","r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } int g_nnode,g_ncell,g_nedge,g_nbedge; check_scan(fscanf(fp,"%d %d %d %d \n",&g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4); int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0; double *g_x = 0,*g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0; // set constants op_printf("initialising flow field\n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; op_printf("reading in grid \n"); op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n" ,g_nnode,g_ncell,g_nedge,g_nbedge); if(my_rank == MPI_ROOT) { g_cell = (int *) malloc(4*g_ncell*sizeof(int)); g_edge = (int *) malloc(2*g_nedge*sizeof(int)); g_ecell = (int *) malloc(2*g_nedge*sizeof(int)); g_bedge = (int *) malloc(2*g_nbedge*sizeof(int)); g_becell = (int *) malloc( g_nbedge*sizeof(int)); g_bound = (int *) malloc( g_nbedge*sizeof(int)); g_x = (double *) malloc(2*g_nnode*sizeof(double)); g_q = (double *) malloc(4*g_ncell*sizeof(double)); g_qold = (double *) malloc(4*g_ncell*sizeof(double)); g_res = (double *) malloc(4*g_ncell*sizeof(double)); g_adt = (double *) malloc( g_ncell*sizeof(double)); for (int n=0; n<g_nnode; n++){ check_scan(fscanf(fp,"%lf %lf \n",&g_x[2*n], &g_x[2*n+1]), 2); } for (int n=0; n<g_ncell; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_cell[4*n ], &g_cell[4*n+1], &g_cell[4*n+2], &g_cell[4*n+3]), 4); } for (int n=0; n<g_nedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_edge[2*n],&g_edge[2*n+1], &g_ecell[2*n],&g_ecell[2*n+1]), 4); } for (int n=0; n<g_nbedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_bedge[2*n],&g_bedge[2*n+1], &g_becell[n],&g_bound[n]), 4); } //initialise flow field and residual for (int n=0; n<g_ncell; n++) { for (int m=0; m<4; m++) { g_q[4*n+m] = qinf[m]; g_res[4*n+m] = 0.0f; } } } fclose(fp); nnode = compute_local_size (g_nnode, comm_size, my_rank); ncell = compute_local_size (g_ncell, comm_size, my_rank); nedge = compute_local_size (g_nedge, comm_size, my_rank); nbedge = compute_local_size (g_nbedge, comm_size, my_rank); op_printf("Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n" ,my_rank,nnode,ncell,nedge,nbedge); /*Allocate memory to hold local sets, mapping tables and data*/ cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (double *) malloc(2*nnode*sizeof(double)); q = (double *) malloc(4*ncell*sizeof(double)); qold = (double *) malloc(4*ncell*sizeof(double)); res = (double *) malloc(4*ncell*sizeof(double)); adt = (double *) malloc( ncell*sizeof(double)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4); scatter_int_array(g_edge, edge, comm_size, g_nedge,nedge, 2); scatter_int_array(g_ecell, ecell, comm_size, g_nedge,nedge, 2); scatter_int_array(g_bedge, bedge, comm_size, g_nbedge,nbedge, 2); scatter_int_array(g_becell, becell, comm_size, g_nbedge,nbedge, 1); scatter_int_array(g_bound, bound, comm_size, g_nbedge,nbedge, 1); scatter_double_array(g_x, x, comm_size, g_nnode,nnode, 2); scatter_double_array(g_q, q, comm_size, g_ncell,ncell, 4); scatter_double_array(g_qold, qold, comm_size, g_ncell,ncell, 4); scatter_double_array(g_res, res, comm_size, g_ncell,ncell, 4); scatter_double_array(g_adt, adt, comm_size, g_ncell,ncell, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if(my_rank == MPI_ROOT) { free(g_cell); free(g_edge); free(g_ecell); free(g_bedge); free(g_becell); free(g_bound); free(g_x ); free(g_q); free(g_qold); free(g_adt); free(g_res); } op_timers(&cpu_t2, &wall_t2); op_printf("Max total file read time = %f\n", wall_t2-wall_t1); /**------------------------END I/O and PARTITIONING -----------------------**/ // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"double",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"double",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"double",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"double",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"double",res ,"p_res"); op_decl_const2("gam",1,"double",&gam); op_decl_const2("gm1",1,"double",&gm1); op_decl_const2("cfl",1,"double",&cfl); op_decl_const2("eps",1,"double",&eps); op_decl_const2("mach",1,"double",&mach); op_decl_const2("alpha",1,"double",&alpha); op_decl_const2("qinf",4,"double",qinf); op_diagnostic_output(); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", cells, pecell, p_x); //op_partition("PARMETIS", "KWAY", cells, pecell, p_x); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); niter = 1000; for(int iter=1; iter<=niter; iter++) { //save old flow solution op_par_loop_save_soln("save_soln",cells, op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop_adt_calc("adt_calc",cells, op_arg_dat(p_x,0,pcell,2,"double",OP_READ), op_arg_dat(p_x,1,pcell,2,"double",OP_READ), op_arg_dat(p_x,2,pcell,2,"double",OP_READ), op_arg_dat(p_x,3,pcell,2,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_WRITE)); // calculate flux residual op_par_loop_res_calc("res_calc",edges, op_arg_dat(p_x,0,pedge,2,"double",OP_READ), op_arg_dat(p_x,1,pedge,2,"double",OP_READ), op_arg_dat(p_q,0,pecell,4,"double",OP_READ), op_arg_dat(p_q,1,pecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pecell,1,"double",OP_READ), op_arg_dat(p_adt,1,pecell,1,"double",OP_READ), op_arg_dat(p_res,0,pecell,4,"double",OP_INC), op_arg_dat(p_res,1,pecell,4,"double",OP_INC)); op_par_loop_bres_calc("bres_calc",bedges, op_arg_dat(p_x,0,pbedge,2,"double",OP_READ), op_arg_dat(p_x,1,pbedge,2,"double",OP_READ), op_arg_dat(p_q,0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pbecell,1,"double",OP_READ), op_arg_dat(p_res,0,pbecell,4,"double",OP_INC), op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ)); // update flow field rms = 0.0; op_par_loop_update("update",cells, op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_WRITE), op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&rms,1,"double",OP_INC)); } //print iteration history rms = sqrt(rms/(double) g_ncell); if (iter%100 == 0) op_printf("%d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); //output the result dat array to files op_print_dat_to_txtfile(p_q, "out_grid_mpi.dat"); //ASCI op_print_dat_to_binfile(p_q, "out_grid_mpi.bin"); //Binary //write given op_dat's indicated segment of data to a memory block in the order it was originally //arranged (i.e. before partitioning and reordering) double* q_part = (double *)op_malloc(sizeof(double)*op_get_size(cells)*4); op_fetch_data_idx(p_q, q_part, 0, op_get_size(cells)-1); free(q_part); op_timing_output(); op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // set constants and initialise flow field and residual op_printf("initialising flow field \n"); char file[] = "new_grid.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell"); op_map m_test = op_decl_map_hdf5(cells, nodes,4, file, "m_test"); if (m_test == NULL) printf("m_test not found\n"); op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x"); op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q"); op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt"); op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res"); op_dat p_test = op_decl_dat_hdf5(cells ,4,"double",file,"p_test"); if (p_test == NULL) printf("p_test not found\n"); op_get_const_hdf5("gam", 1, "double", (char *)&gam, "new_grid.h5"); op_get_const_hdf5("gm1", 1, "double", (char *)&gm1, "new_grid.h5"); op_get_const_hdf5("cfl", 1, "double", (char *)&cfl, "new_grid.h5"); op_get_const_hdf5("eps", 1, "double", (char *)&eps, "new_grid.h5"); op_get_const_hdf5("mach", 1, "double", (char *)&mach, "new_grid.h5"); op_get_const_hdf5("alpha", 1, "double", (char *)&alpha, "new_grid.h5"); op_get_const_hdf5("qinf", 4, "double", (char *)&qinf, "new_grid.h5"); op_decl_const2("gam",1,"double",&gam); op_decl_const2("gm1",1,"double",&gm1); op_decl_const2("cfl",1,"double",&cfl); op_decl_const2("eps",1,"double",&eps); op_decl_const2("mach",1,"double",&mach); op_decl_const2("alpha",1,"double",&alpha); op_decl_const2("qinf",4,"double",qinf); op_diagnostic_output(); //write back original data just to compare you read the file correctly //do an h5diff between new_grid_out.h5 and new_grid.h5 to //compare two hdf5 files op_dump_to_hdf5("new_grid_out.h5"); op_write_const_hdf5("gam",1,"double",(char *)&gam, "new_grid_out.h5"); op_write_const_hdf5("gm1",1,"double",(char *)&gm1, "new_grid_out.h5"); op_write_const_hdf5("cfl",1,"double",(char *)&cfl, "new_grid_out.h5"); op_write_const_hdf5("eps",1,"double",(char *)&eps, "new_grid_out.h5"); op_write_const_hdf5("mach",1,"double",(char *)&mach, "new_grid_out.h5"); op_write_const_hdf5("alpha",1,"double",(char *)&alpha, "new_grid_out.h5"); op_write_const_hdf5("qinf",4,"double",(char *)qinf, "new_grid_out.h5"); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x); //op_partition("PARMETIS", "KWAY", edges, pecell, p_x); int g_ncell = op_get_size(cells); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop_save_soln("save_soln",cells, op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop_adt_calc("adt_calc",cells, op_arg_dat(p_x,0,pcell,2,"double",OP_READ), op_arg_dat(p_x,1,pcell,2,"double",OP_READ), op_arg_dat(p_x,2,pcell,2,"double",OP_READ), op_arg_dat(p_x,3,pcell,2,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_WRITE)); // calculate flux residual op_par_loop_res_calc("res_calc",edges, op_arg_dat(p_x,0,pedge,2,"double",OP_READ), op_arg_dat(p_x,1,pedge,2,"double",OP_READ), op_arg_dat(p_q,0,pecell,4,"double",OP_READ), op_arg_dat(p_q,1,pecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pecell,1,"double",OP_READ), op_arg_dat(p_adt,1,pecell,1,"double",OP_READ), op_arg_dat(p_res,0,pecell,4,"double",OP_INC), op_arg_dat(p_res,1,pecell,4,"double",OP_INC)); op_par_loop_bres_calc("bres_calc",bedges, op_arg_dat(p_x,0,pbedge,2,"double",OP_READ), op_arg_dat(p_x,1,pbedge,2,"double",OP_READ), op_arg_dat(p_q,0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pbecell,1,"double",OP_READ), op_arg_dat(p_res,0,pbecell,4,"double",OP_INC), op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ)); // update flow field rms = 0.0; op_par_loop_update("update",cells, op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_WRITE), op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double)g_ncell); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); //write given op_dat's indicated segment of data to a memory block in the order it was originally //arranged (i.e. before partitioning and reordering) double* q = (double *)op_malloc(sizeof(double)*op_get_size(cells)*4); op_fetch_data_idx(p_q, q, 0, op_get_size(cells)-1); free(q); //write given op_dat's data to hdf5 file in the order it was originally arranged (i.e. before partitioning and reordering) op_fetch_data_hdf5_file(p_q, "file_name.h5"); //printf("Root process = %d\n",op_is_root()); //output the result dat array to files //op_dump_to_hdf5("new_grid_out.h5"); //writes data as it is held on each process (under MPI) //compress using // ~/hdf5/bin/h5repack -f GZIP=9 new_grid.h5 new_grid_pack.h5 op_timing_output(); op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); }
op_plan *op_plan_core(char const *name, op_set set, int part_size, int nargs, op_arg *args, int ninds, int *inds, int staging) { // set exec length int exec_length = set->size; for (int i = 0; i < nargs; i++) { if (args[i].opt && args[i].idx != -1 && args[i].acc != OP_READ) { exec_length += set->exec_size; break; } } /* first look for an existing execution plan */ int ip = 0, match = 0; while (match == 0 && ip < OP_plan_index) { if ((strcmp(name, OP_plans[ip].name) == 0) && (set == OP_plans[ip].set) && (nargs == OP_plans[ip].nargs) && (ninds == OP_plans[ip].ninds) && (part_size == OP_plans[ip].part_size)) { match = 1; for (int m = 0; m < nargs; m++) { if (args[m].dat != NULL && OP_plans[ip].dats[m] != NULL) match = match && (args[m].dat->size == OP_plans[ip].dats[m]->size) && (args[m].dat->dim == OP_plans[ip].dats[m]->dim) && (args[m].map == OP_plans[ip].maps[m]) && (args[m].idx == OP_plans[ip].idxs[m]) && (args[m].acc == OP_plans[ip].accs[m]); else match = match && (args[m].dat == OP_plans[ip].dats[m]) && (args[m].map == OP_plans[ip].maps[m]) && (args[m].idx == OP_plans[ip].idxs[m]) && (args[m].acc == OP_plans[ip].accs[m]); } } ip++; } if (match) { ip--; if (OP_diags > 3) printf(" old execution plan #%d\n", ip); OP_plans[ip].count++; return &(OP_plans[ip]); } else { if (OP_diags > 1) printf(" new execution plan #%d for kernel %s\n", ip, name); } double wall_t1, wall_t2, cpu_t1, cpu_t2; op_timers_core(&cpu_t1, &wall_t1); /* work out worst case shared memory requirement per element */ int halo_exchange = 0; for (int i = 0; i < nargs; i++) { if (args[i].opt && args[i].idx != -1 && args[i].acc != OP_WRITE && args[i].acc != OP_INC) { halo_exchange = 1; break; } } int maxbytes = 0; for (int m = 0; m < nargs; m++) { if (args[m].opt && inds[m] >= 0) { if ((staging == OP_STAGE_INC && args[m].acc == OP_INC) || (staging == OP_STAGE_ALL || staging == OP_STAGE_PERMUTE)) maxbytes += args[m].dat->size; } } /* set blocksize and number of blocks; adaptive size based on 48kB of shared * memory */ int bsize = part_size; // blocksize if (bsize == 0 && maxbytes > 0) bsize = MAX((24 * 1024 / (64 * maxbytes)) * 64, 256); // 48kB exactly is too much, make it 24 else if (bsize == 0 && maxbytes == 0) bsize = 256; // If we do 1 level of coloring, do it in one go if (staging == OP_COLOR2) bsize = exec_length; int nblocks = 0; int indirect_reduce = 0; for (int m = 0; m < nargs; m++) { indirect_reduce |= (args[m].acc != OP_READ && args[m].argtype == OP_ARG_GBL); } indirect_reduce &= (ninds > 0); /* Work out indirection arrays for OP_INCs */ int ninds_staged = 0; // number of distinct (unique dat) indirect incs int *inds_staged = (int *)op_malloc(nargs * sizeof(int)); int *inds_to_inds_staged = (int *)op_malloc(ninds * sizeof(int)); for (int i = 0; i < nargs; i++) inds_staged[i] = -1; for (int i = 0; i < ninds; i++) inds_to_inds_staged[i] = -1; for (int i = 0; i < nargs; i++) { if (inds[i] >= 0 && ((staging == OP_STAGE_INC && args[i].acc == OP_INC) || (staging == OP_STAGE_ALL || staging == OP_STAGE_PERMUTE))) { if (inds_to_inds_staged[inds[i]] == -1) { inds_to_inds_staged[inds[i]] = ninds_staged; inds_staged[i] = ninds_staged; ninds_staged++; } else { inds_staged[i] = inds_to_inds_staged[inds[i]]; } } } int *invinds_staged = (int *)op_malloc(ninds_staged * sizeof(int)); for (int i = 0; i < ninds_staged; i++) invinds_staged[i] = -1; for (int i = 0; i < nargs; i++) if (inds[i] >= 0 && ((staging == OP_STAGE_INC && args[i].acc == OP_INC) || (staging == OP_STAGE_ALL || staging == OP_STAGE_PERMUTE)) && invinds_staged[inds_staged[i]] == -1) invinds_staged[inds_staged[i]] = i; int prev_offset = 0; int next_offset = 0; while (next_offset < exec_length) { prev_offset = next_offset; if (prev_offset + bsize >= set->core_size && prev_offset < set->core_size) { next_offset = set->core_size; } else if (prev_offset + bsize >= set->size && prev_offset < set->size && indirect_reduce) { next_offset = set->size; } else if (prev_offset + bsize >= exec_length && prev_offset < exec_length) { next_offset = exec_length; } else { next_offset = prev_offset + bsize; } nblocks++; } // If we do 1 level of coloring, we have a single "block" if (staging == OP_COLOR2) { nblocks = 1; prev_offset = 0; next_offset = exec_length; }; /* enlarge OP_plans array if needed */ if (ip == OP_plan_max) { // printf("allocating more memory for OP_plans %d\n", OP_plan_max); OP_plan_max += 10; OP_plans = (op_plan *)op_realloc(OP_plans, OP_plan_max * sizeof(op_plan)); if (OP_plans == NULL) { printf(" op_plan error -- error reallocating memory for OP_plans\n"); exit(-1); } } /* allocate memory for new execution plan and store input arguments */ OP_plans[ip].dats = (op_dat *)op_malloc(nargs * sizeof(op_dat)); OP_plans[ip].idxs = (int *)op_malloc(nargs * sizeof(int)); OP_plans[ip].optflags = (int *)op_malloc(nargs * sizeof(int)); OP_plans[ip].maps = (op_map *)op_malloc(nargs * sizeof(op_map)); OP_plans[ip].accs = (op_access *)op_malloc(nargs * sizeof(op_access)); OP_plans[ip].inds_staged = (op_access *)op_malloc(ninds_staged * sizeof(op_access)); OP_plans[ip].nthrcol = (int *)op_malloc(nblocks * sizeof(int)); OP_plans[ip].thrcol = (int *)op_malloc(exec_length * sizeof(int)); OP_plans[ip].col_reord = (int *)op_malloc((exec_length + 16) * sizeof(int)); OP_plans[ip].col_offsets = NULL; OP_plans[ip].offset = (int *)op_malloc(nblocks * sizeof(int)); OP_plans[ip].ind_maps = (int **)op_malloc(ninds_staged * sizeof(int *)); OP_plans[ip].ind_offs = (int *)op_malloc(nblocks * ninds_staged * sizeof(int)); OP_plans[ip].ind_sizes = (int *)op_malloc(nblocks * ninds_staged * sizeof(int)); OP_plans[ip].nindirect = (int *)op_calloc(ninds, sizeof(int)); OP_plans[ip].loc_maps = (short **)op_malloc(nargs * sizeof(short *)); OP_plans[ip].nelems = (int *)op_malloc(nblocks * sizeof(int)); OP_plans[ip].ncolblk = (int *)op_calloc(exec_length, sizeof(int)); /* max possibly needed */ OP_plans[ip].blkmap = (int *)op_calloc(nblocks, sizeof(int)); int *offsets = (int *)op_malloc((ninds_staged + 1) * sizeof(int)); offsets[0] = 0; for (int m = 0; m < ninds_staged; m++) { int count = 0; for (int m2 = 0; m2 < nargs; m2++) if (inds_staged[m2] == m) count++; offsets[m + 1] = offsets[m] + count; } OP_plans[ip].ind_map = (int *)op_malloc(offsets[ninds_staged] * exec_length * sizeof(int)); for (int m = 0; m < ninds_staged; m++) { OP_plans[ip].ind_maps[m] = &OP_plans[ip].ind_map[exec_length * offsets[m]]; } free(offsets); int counter = 0; for (int m = 0; m < nargs; m++) { if (inds_staged[m] >= 0) counter++; else OP_plans[ip].loc_maps[m] = NULL; OP_plans[ip].dats[m] = args[m].dat; OP_plans[ip].idxs[m] = args[m].idx; OP_plans[ip].optflags[m] = args[m].opt; OP_plans[ip].maps[m] = args[m].map; OP_plans[ip].accs[m] = args[m].acc; } OP_plans[ip].loc_map = (short *)op_malloc(counter * exec_length * sizeof(short)); counter = 0; for (int m = 0; m < nargs; m++) { if (inds_staged[m] >= 0) { OP_plans[ip].loc_maps[m] = &OP_plans[ip].loc_map[exec_length * (counter)]; counter++; } } OP_plans[ip].name = name; OP_plans[ip].set = set; OP_plans[ip].nargs = nargs; OP_plans[ip].ninds = ninds; OP_plans[ip].ninds_staged = ninds_staged; OP_plans[ip].part_size = part_size; OP_plans[ip].nblocks = nblocks; OP_plans[ip].ncolors_core = 0; OP_plans[ip].ncolors_owned = 0; OP_plans[ip].count = 1; OP_plans[ip].inds_staged = inds_staged; OP_plan_index++; /* define aliases */ op_dat *dats = OP_plans[ip].dats; int *idxs = OP_plans[ip].idxs; op_map *maps = OP_plans[ip].maps; op_access *accs = OP_plans[ip].accs; int *offset = OP_plans[ip].offset; int *nelems = OP_plans[ip].nelems; int **ind_maps = OP_plans[ip].ind_maps; int *ind_offs = OP_plans[ip].ind_offs; int *ind_sizes = OP_plans[ip].ind_sizes; int *nindirect = OP_plans[ip].nindirect; /* allocate working arrays */ uint **work; work = (uint **)op_malloc(ninds * sizeof(uint *)); for (int m = 0; m < ninds; m++) { int m2 = 0; while (inds[m2] != m) m2++; if (args[m2].opt == 0) { work[m] = NULL; continue; } int to_size = (maps[m2]->to)->exec_size + (maps[m2]->to)->nonexec_size + (maps[m2]->to)->size; work[m] = (uint *)op_malloc(to_size * sizeof(uint)); } int *work2; work2 = (int *)op_malloc(nargs * bsize * sizeof(int)); /* max possibly needed */ /* process set one block at a time */ float total_colors = 0; prev_offset = 0; next_offset = 0; for (int b = 0; b < nblocks; b++) { prev_offset = next_offset; if (prev_offset + bsize >= set->core_size && prev_offset < set->core_size) { next_offset = set->core_size; } else if (prev_offset + bsize >= set->size && prev_offset < set->size && indirect_reduce) { next_offset = set->size; } else if (prev_offset + bsize >= exec_length && prev_offset < exec_length) { next_offset = exec_length; } else { next_offset = prev_offset + bsize; } if (staging == OP_COLOR2) { prev_offset = 0; next_offset = exec_length; }; int bs = next_offset - prev_offset; offset[b] = prev_offset; /* offset for block */ nelems[b] = bs; /* size of block */ /* loop over indirection sets */ for (int m = 0; m < ninds; m++) { int m2 = 0; while (inds[m2] != m) m2++; int m3 = inds_staged[m2]; if (m3 < 0) continue; if (args[m2].opt == 0) { if (b == 0) { ind_offs[m3 + b * ninds_staged] = 0; ind_sizes[m3 + b * ninds_staged] = 0; } else { ind_offs[m3 + b * ninds_staged] = ind_offs[m3 + (b - 1) * ninds_staged]; ind_sizes[m3 + b * ninds_staged] = 0; } continue; } /* build the list of elements indirectly referenced in this block */ int ne = 0; /* number of elements */ for (int m2 = 0; m2 < nargs; m2++) { if (inds[m2] == m) { for (int e = prev_offset; e < next_offset; e++) work2[ne++] = maps[m2]->map[idxs[m2] + e * maps[m2]->dim]; } } /* sort them, then eliminate duplicates */ qsort(work2, ne, sizeof(int), comp); int nde = 0; int p = 0; while (p < ne) { work2[nde] = work2[p]; while (p < ne && work2[p] == work2[nde]) p++; nde++; } ne = nde; /* number of distinct elements */ /* if (OP_diags > 5) { printf(" indirection set %d: ",m); for (int e=0; e<ne; e++) printf(" %d",work2[e]); printf(" \n"); } */ /* store mapping and renumbered mappings in execution plan */ for (int e = 0; e < ne; e++) { ind_maps[m3][nindirect[m]++] = work2[e]; work[m][work2[e]] = e; // inverse mapping } for (int m2 = 0; m2 < nargs; m2++) { if (inds[m2] == m) { for (int e = prev_offset; e < next_offset; e++) OP_plans[ip].loc_maps[m2][e] = (short)(work[m][maps[m2]->map[idxs[m2] + e * maps[m2]->dim]]); } } if (b == 0) { ind_offs[m3 + b * ninds_staged] = 0; ind_sizes[m3 + b * ninds_staged] = nindirect[m]; } else { ind_offs[m3 + b * ninds_staged] = ind_offs[m3 + (b - 1) * ninds_staged] + ind_sizes[m3 + (b - 1) * ninds_staged]; ind_sizes[m3 + b * ninds_staged] = nindirect[m] - ind_offs[m3 + b * ninds_staged]; } } /* now colour main set elements */ for (int e = prev_offset; e < next_offset; e++) OP_plans[ip].thrcol[e] = -1; int repeat = 1; int ncolor = 0; int ncolors = 0; while (repeat) { repeat = 0; for (int m = 0; m < nargs; m++) { if (inds[m] >= 0 && args[m].opt) for (int e = prev_offset; e < next_offset; e++) work[inds[m]][maps[m]->map[idxs[m] + e * maps[m]->dim]] = 0; /* zero out color array */ } for (int e = prev_offset; e < next_offset; e++) { if (OP_plans[ip].thrcol[e] == -1) { int mask = 0; if (staging == OP_COLOR2 && halo_exchange && e >= set->core_size && ncolor == 0) mask = 1; for (int m = 0; m < nargs; m++) if (inds[m] >= 0 && (accs[m] == OP_INC || accs[m] == OP_RW) && args[m].opt) mask |= work[inds[m]] [maps[m]->map[idxs[m] + e * maps[m]->dim]]; /* set bits of mask */ int color = ffs(~mask) - 1; /* find first bit not set */ if (color == -1) { /* run out of colors on this pass */ repeat = 1; } else { OP_plans[ip].thrcol[e] = ncolor + color; mask = 1 << color; ncolors = MAX(ncolors, ncolor + color + 1); for (int m = 0; m < nargs; m++) if (inds[m] >= 0 && (accs[m] == OP_INC || accs[m] == OP_RW) && args[m].opt) work[inds[m]][maps[m]->map[idxs[m] + e * maps[m]->dim]] |= mask; /* set color bit */ } } } ncolor += 32; /* increment base level */ } OP_plans[ip].nthrcol[b] = ncolors; /* number of thread colors in this block */ total_colors += ncolors; // if(ncolors>1) printf(" number of colors in this block = %d \n",ncolors); } /* create element permutation by color */ if (staging == OP_STAGE_PERMUTE || staging == OP_COLOR2) { int size_of_col_offsets = 0; for (int b = 0; b < nblocks; b++) { size_of_col_offsets += OP_plans[ip].nthrcol[b] + 1; } // allocate OP_plans[ip].col_offsets = (int **)op_malloc(nblocks * sizeof(int *)); int *col_offsets = (int *)op_malloc(size_of_col_offsets * sizeof(int *)); size_of_col_offsets = 0; op_keyvalue *kv = (op_keyvalue *)op_malloc(bsize * sizeof(op_keyvalue)); for (int b = 0; b < nblocks; b++) { int ncolor = OP_plans[ip].nthrcol[b]; for (int e = 0; e < nelems[b]; e++) { kv[e].key = OP_plans[ip].thrcol[offset[b] + e]; kv[e].value = e; } qsort(kv, nelems[b], sizeof(op_keyvalue), comp2); OP_plans[ip].col_offsets[b] = col_offsets + size_of_col_offsets; OP_plans[ip].col_offsets[b][0] = 0; size_of_col_offsets += (ncolor + 1); // Set up permutation and pointers to beginning of each color ncolor = 0; for (int e = 0; e < nelems[b]; e++) { OP_plans[ip].thrcol[offset[b] + e] = kv[e].key; OP_plans[ip].col_reord[offset[b] + e] = kv[e].value; if (e > 0) if (kv[e].key > kv[e - 1].key) { ncolor++; OP_plans[ip].col_offsets[b][ncolor] = e; } } OP_plans[ip].col_offsets[b][ncolor + 1] = nelems[b]; } for (int i = exec_length; i < exec_length + 16; i++) OP_plans[ip].col_reord[i] = 0; } /* color the blocks, after initialising colors to 0 */ int *blk_col; blk_col = (int *)op_malloc(nblocks * sizeof(int)); for (int b = 0; b < nblocks; b++) blk_col[b] = -1; int repeat = 1; int ncolor = 0; int ncolors = 0; while (repeat) { repeat = 0; for (int m = 0; m < nargs; m++) { if (inds[m] >= 0 && args[m].opt) { int to_size = (maps[m]->to)->exec_size + (maps[m]->to)->nonexec_size + (maps[m]->to)->size; for (int e = 0; e < to_size; e++) work[inds[m]][e] = 0; // zero out color arrays } } prev_offset = 0; next_offset = 0; for (int b = 0; b < nblocks; b++) { prev_offset = next_offset; if (prev_offset + bsize >= set->core_size && prev_offset < set->core_size) { next_offset = set->core_size; } else if (prev_offset + bsize >= set->size && prev_offset < set->size && indirect_reduce) { next_offset = set->size; } else if (prev_offset + bsize >= exec_length && prev_offset < exec_length) { next_offset = exec_length; } else { next_offset = prev_offset + bsize; } if (blk_col[b] == -1) { // color not yet assigned to block uint mask = 0; if (next_offset > set->core_size) { // should not use block colors from // the core set when doing the // non_core ones if (prev_offset <= set->core_size) OP_plans[ip].ncolors_core = ncolors; for (int shifter = 0; shifter < OP_plans[ip].ncolors_core; shifter++) mask |= 1 << shifter; if (prev_offset == set->size && indirect_reduce) OP_plans[ip].ncolors_owned = ncolors; for (int shifter = OP_plans[ip].ncolors_core; indirect_reduce && shifter < OP_plans[ip].ncolors_owned; shifter++) mask |= 1 << shifter; } for (int m = 0; m < nargs; m++) { if (inds[m] >= 0 && (accs[m] == OP_INC || accs[m] == OP_RW) && args[m].opt) for (int e = prev_offset; e < next_offset; e++) mask |= work[inds[m]] [maps[m]->map[idxs[m] + e * maps[m]->dim]]; // set // bits of // mask } int color = ffs(~mask) - 1; // find first bit not set if (color == -1) { // run out of colors on this pass repeat = 1; } else { blk_col[b] = ncolor + color; mask = 1 << color; ncolors = MAX(ncolors, ncolor + color + 1); for (int m = 0; m < nargs; m++) { if (inds[m] >= 0 && (accs[m] == OP_INC || accs[m] == OP_RW) && args[m].opt) for (int e = prev_offset; e < next_offset; e++) work[inds[m]][maps[m]->map[idxs[m] + e * maps[m]->dim]] |= mask; } } } } ncolor += 32; // increment base level } /* store block mapping and number of blocks per color */ if (indirect_reduce && OP_plans[ip].ncolors_owned == 0) OP_plans[ip].ncolors_owned = ncolors; // no MPI, so get the reduction arrays after everyting is done OP_plans[ip].ncolors = ncolors; if (staging == OP_COLOR2) OP_plans[ip].ncolors = OP_plans[ip].nthrcol[0]; /*for(int col = 0; col = OP_plans[ip].ncolors;col++) //should initialize to zero because op_calloc returns garbage!! { OP_plans[ip].ncolblk[col] = 0; }*/ for (int b = 0; b < nblocks; b++) OP_plans[ip].ncolblk[blk_col[b]]++; // number of blocks of each color for (int c = 1; c < ncolors; c++) OP_plans[ip].ncolblk[c] += OP_plans[ip].ncolblk[c - 1]; // cumsum for (int c = 0; c < ncolors; c++) work2[c] = 0; for (int b = 0; b < nblocks; b++) { int c = blk_col[b]; int b2 = work2[c]; // number of preceding blocks of this color if (c > 0) b2 += OP_plans[ip].ncolblk[c - 1]; // plus previous colors OP_plans[ip].blkmap[b2] = b; work2[c]++; // increment counter } for (int c = ncolors - 1; c > 0; c--) OP_plans[ip].ncolblk[c] -= OP_plans[ip].ncolblk[c - 1]; // undo cumsum /* reorder blocks by color? */ /* work out shared memory requirements */ OP_plans[ip].nsharedCol = (int *)op_malloc(ncolors * sizeof(int)); float total_shared = 0; for (int col = 0; col < ncolors; col++) { OP_plans[ip].nsharedCol[col] = 0; for (int b = 0; b < nblocks; b++) { if (blk_col[b] == col) { int nbytes = 0; for (int m = 0; m < ninds_staged; m++) { int m2 = 0; while (inds_staged[m2] != m) m2++; if (args[m2].opt == 0) continue; nbytes += ROUND_UP_64(ind_sizes[m + b * ninds_staged] * dats[m2]->size); } OP_plans[ip].nsharedCol[col] = MAX(OP_plans[ip].nsharedCol[col], nbytes); total_shared += nbytes; } } } OP_plans[ip].nshared = 0; total_shared = 0; for (int b = 0; b < nblocks; b++) { int nbytes = 0; for (int m = 0; m < ninds_staged; m++) { int m2 = 0; while (inds_staged[m2] != m) m2++; if (args[m2].opt == 0) continue; nbytes += ROUND_UP_64(ind_sizes[m + b * ninds_staged] * dats[m2]->size); } OP_plans[ip].nshared = MAX(OP_plans[ip].nshared, nbytes); total_shared += nbytes; } /* work out total bandwidth requirements */ OP_plans[ip].transfer = 0; OP_plans[ip].transfer2 = 0; float transfer3 = 0; if (staging != OP_COLOR2 && staging != OP_STAGE_INC) { for (int b = 0; b < nblocks; b++) { for (int m = 0; m < nargs; m++) // for each argument { if (args[m].opt) { if (inds[m] < 0) // if it is directly addressed { float fac = 2.0f; if (accs[m] == OP_READ || accs[m] == OP_WRITE) // if you only read or write it fac = 1.0f; if (dats[m] != NULL) { OP_plans[ip].transfer += fac * nelems[b] * dats[m]->size; // cost of reading it all OP_plans[ip].transfer2 += fac * nelems[b] * dats[m]->size; transfer3 += fac * nelems[b] * dats[m]->size; } } else // if it is indirectly addressed: cost of reading the pointer // to it { OP_plans[ip].transfer += nelems[b] * sizeof(short); OP_plans[ip].transfer2 += nelems[b] * sizeof(short); transfer3 += nelems[b] * sizeof(short); } } } for (int m = 0; m < ninds; m++) // for each indirect mapping { int m2 = 0; while (inds[m2] != m) // find the first argument that uses this mapping m2++; if (args[m2].opt == 0) continue; float fac = 2.0f; if (accs[m2] == OP_READ || accs[m2] == OP_WRITE) // only read it fac = 1.0f; if (staging == OP_STAGE_INC && accs[m2] != OP_INC) { OP_plans[ip].transfer += 1; OP_plans[ip].transfer2 += 1; continue; } OP_plans[ip].transfer += fac * ind_sizes[m + b * ninds] * dats[m2]->size; // simply read all data one by one /* work out how many cache lines are used by indirect addressing */ int i_map, l_new, l_old; int e0 = ind_offs[m + b * ninds]; // where it starts int e1 = e0 + ind_sizes[m + b * ninds]; // where it ends l_old = -1; for (int e = e0; e < e1; e++) // iterate through every indirectly accessed data element { i_map = ind_maps[m][e]; // the pointer to the data element l_new = (i_map * dats[m2]->size) / OP_cache_line_size; // which cache line it is on (full size, // dim*sizeof(type)) if (l_new > l_old) // if it is on a further cache line (that is not // yet loaded, - i_map is ordered) OP_plans[ip].transfer2 += fac * OP_cache_line_size; // load the cache line l_old = l_new; l_new = ((i_map + 1) * dats[m2]->size - 1) / OP_cache_line_size; // the last byte of the data OP_plans[ip].transfer2 += fac * (l_new - l_old) * OP_cache_line_size; // again, if not loaded, // load it (can be // multiple cache lines) l_old = l_new; } l_old = -1; for (int e = e0; e < e1; e++) { i_map = ind_maps[m][e]; // pointer to the data element l_new = (i_map * dats[m2]->size) / (dats[m2]->dim * OP_cache_line_size); // which cache line the // first dimension of // the data is on if (l_new > l_old) transfer3 += fac * dats[m2]->dim * OP_cache_line_size; // if not loaded yet, load all cache lines l_old = l_new; l_new = ((i_map + 1) * dats[m2]->size - 1) / (dats[m2]->dim * OP_cache_line_size); // primitve type's last byte transfer3 += fac * (l_new - l_old) * dats[m2]->dim * OP_cache_line_size; // load it l_old = l_new; } /* also include mappings to load/store data */ fac = 1.0f; if (accs[m2] == OP_RW) fac = 2.0f; OP_plans[ip].transfer += fac * ind_sizes[m + b * ninds] * sizeof(int); OP_plans[ip].transfer2 += fac * ind_sizes[m + b * ninds] * sizeof(int); transfer3 += fac * ind_sizes[m + b * ninds] * sizeof(int); } } } /* print out useful information */ if (OP_diags > 1) { printf(" number of blocks = %d \n", nblocks); printf(" number of block colors = %d \n", OP_plans[ip].ncolors); printf(" maximum block size = %d \n", bsize); printf(" average thread colors = %.2f \n", total_colors / nblocks); printf(" shared memory required = "); for (int i = 0; i < ncolors - 1; i++) printf(" %.2f KB,", OP_plans[ip].nsharedCol[i] / 1024.0f); printf(" %.2f KB\n", OP_plans[ip].nsharedCol[ncolors - 1] / 1024.0f); printf(" average data reuse = %.2f \n", maxbytes * (exec_length / total_shared)); printf(" data transfer (used) = %.2f MB \n", OP_plans[ip].transfer / (1024.0f * 1024.0f)); printf(" data transfer (total) = %.2f MB \n", OP_plans[ip].transfer2 / (1024.0f * 1024.0f)); printf(" SoA/AoS transfer ratio = %.2f \n\n", transfer3 / OP_plans[ip].transfer2); } /* validate plan info */ op_plan_check(OP_plans[ip], ninds_staged, inds_staged); /* free work arrays */ for (int m = 0; m < ninds; m++) free(work[m]); free(work); free(work2); free(blk_col); free(inds_to_inds_staged); free(invinds_staged); op_timers_core(&cpu_t2, &wall_t2); for (int i = 0; i < OP_kern_max; i++) { if (strcmp(name, OP_kernels[i].name) == 0) { OP_kernels[i].plan_time += wall_t2 - wall_t1; break; } } /* return pointer to plan */ OP_plan_time += wall_t2 - wall_t1; return &(OP_plans[ip]); }
void op_plan_check(op_plan OP_plan, int ninds, int *inds) { // compute exec_length - which include the exec halo given certain conditions // (MPI) int exec_length = OP_plan.set->size; for (int m = 0; m < OP_plan.nargs; m++) { if (OP_plan.idxs[m] != -1 && OP_plan.accs[m] != OP_READ) // if it needs exchaning { exec_length += OP_plan.set->exec_size; break; } } int err, ntot; int nblock = 0; for (int col = 0; col < OP_plan.ncolors; col++) { nblock += OP_plan.ncolblk[col]; } /* * check total size */ int nelem = 0; for (int n = 0; n < nblock; n++) nelem += OP_plan.nelems[n]; if (nelem != exec_length) { printf(" *** OP_plan_check: nelems error \n"); } else if (OP_diags > 6) { printf(" *** OP_plan_check: nelems OK \n"); } /* * check offset and nelems are consistent */ err = 0; ntot = 0; for (int n = 0; n < nblock; n++) { err += (OP_plan.offset[n] != ntot); ntot += OP_plan.nelems[n]; } if (err != 0) { printf(" *** OP_plan_check: offset error \n"); } else if (OP_diags > 6) { printf(" *** OP_plan_check: offset OK \n"); } /* * check blkmap permutation */ int *blkmap = (int *)op_malloc(nblock * sizeof(int)); for (int n = 0; n < nblock; n++) blkmap[n] = OP_plan.blkmap[n]; qsort(blkmap, nblock, sizeof(int), comp); err = 0; for (int n = 0; n < nblock; n++) err += (blkmap[n] != n); free(blkmap); if (err != 0) { printf(" *** OP_plan_check: blkmap error \n"); } else if (OP_diags > 6) { printf(" *** OP_plan_check: blkmap OK \n"); } /* * check ind_offs and ind_sizes are consistent */ err = 0; for (int i = 0; i < ninds; i++) { ntot = 0; for (int n = 0; n < nblock; n++) { err += (OP_plan.ind_offs[i + n * ninds] != ntot); ntot += OP_plan.ind_sizes[i + n * ninds]; } } if (err != 0) { printf(" *** OP_plan_check: ind_offs error \n"); } else if (OP_diags > 6) { printf(" *** OP_plan_check: ind_offs OK \n"); } /* * check ind_maps correctly ordered within each block * and indices within range */ err = 0; for (int m = 0; m < ninds; m++) { int m2 = 0; while (inds[m2] != m) m2++; if (OP_plan.maps[m2] == NULL) continue; // it is a deactivated optional argument int halo_size = (OP_plan.maps[m2]->to)->exec_size + (OP_plan.maps[m2]->to)->nonexec_size; int set_size = OP_plan.maps[m2]->to->size + halo_size; ntot = 0; for (int n = 0; n < nblock; n++) { int last = -1; for (int e = ntot; e < ntot + OP_plan.ind_sizes[m + n * ninds]; e++) { err += (OP_plan.ind_maps[m][e] <= last); last = OP_plan.ind_maps[m][e]; } err += (last >= set_size); ntot += OP_plan.ind_sizes[m + n * ninds]; } } if (err != 0) { printf(" *** OP_plan_check: ind_maps error \n"); } else if (OP_diags > 6) { printf(" *** OP_plan_check: ind_maps OK \n"); } /* *check maps (most likely source of errors) */ err = 0; for (int m = 0; m < OP_plan.nargs; m++) { if (OP_plan.maps[m] != NULL && OP_plan.optflags[m] && OP_plan.loc_maps[m] != NULL) { op_map map = OP_plan.maps[m]; int m2 = inds[m]; ntot = 0; for (int n = 0; n < nblock; n++) { for (int e = ntot; e < ntot + OP_plan.nelems[n]; e++) { int p_local = OP_plan.loc_maps[m][e]; int p_global = OP_plan.ind_maps[m2][p_local + OP_plan.ind_offs[m2 + n * ninds]]; err += (p_global != map->map[OP_plan.idxs[m] + e * map->dim]); } ntot += OP_plan.nelems[n]; } } } if (err != 0) { printf(" *** OP_plan_check: %d maps error(s) \n", err); } else if (OP_diags > 6) { printf(" *** OP_plan_check: maps OK \n"); } /* * check thread and block coloring */ return; }
void* OpMemGroup::NewGRO(size_t size) { // This allocator will not accept allocations of zero byte sizes OP_ASSERT(size > 0); // This is assumed below OP_ASSERT(MEMORY_GROUP_SIZE > MEMORY_GROUP_UNUSABLE_SIZE); // // Round up to nearest MEMORY_ALIGNMENT boundary to assure correct // alignment of objects. // size_t mask = (~(size_t)0) - (MEMORY_ALIGNMENT - 1); size = (size + MEMORY_ALIGNMENT - 1) & mask; if ( primary_size >= size ) { // Satisfy request from primary memory area void* ptr = (void*)primary_ptr; primary_ptr += size; primary_size -= size; return ptr; } if ( secondary_size >= size ) { // Satisfy request from secondary memory area void* ptr = (void*)secondary_ptr; secondary_ptr += size; secondary_size -= size; return ptr; } // // None of the primary or secondary memory areas where large enough to // satisfy the request, so we have to allocate a chunk of memory from // somewhere else first. // // Since we need to allocate something, determine size of the // header for chaining the allocations, since this will be needed // later: // const int header_size = (sizeof(void*) > MEMORY_ALIGNMENT) ? sizeof(void*) : MEMORY_ALIGNMENT; // // Maybe the request failed because the size was very large? If // so, create a "private" allocation for the request. The limit is // set at 1/6th of the memory group size (chunk size), which is // really just an arbitrary number: // if ( size >= (MEMORY_GROUP_SIZE/6) ) { void* ptr = op_malloc(size + header_size); if ( ptr == 0 ) return 0; // Add this allocation to the chain of all allocations *(void**)ptr = all; all = ptr; return (void*)(((char*)ptr) + header_size); } // // The request was not large, so the two areas are either full, or // they have not been created yet, so create a new one: // void* fresh_area = op_malloc(MEMORY_GROUP_SIZE); if ( fresh_area == 0 ) return 0; // Add this allocation to the chain of all allocations *(void**)fresh_area = all; all = fresh_area; // // Make sure the primary area is the one with the most bytes // free (since this is the one to be kept, and is the one to // be filled up first). // if ( primary_size < secondary_size ) { // Swap primary and secondary char* tmp_ptr = primary_ptr; int tmp_size = primary_size; primary_ptr = secondary_ptr; primary_size = secondary_size; secondary_ptr = tmp_ptr; secondary_size = tmp_size; } void* ptr; if ( primary_size < MEMORY_GROUP_UNUSABLE_SIZE ) { // // The primary, which is the one with the most free bytes, has // less free bytes than the threshold for what is considered // useful. Keep it for later none the less just in case, but // only as the secondary to speed up allocations from a new // fresh primary. // // This strategy will gain us something when allocations are // generally small in size. Profiling may reveal that this // optimization is not necessary. // secondary_ptr = primary_ptr; secondary_size = primary_size; // Give the fresh chunk of memory to the primary: primary_ptr = ((char*)fresh_area) + header_size; primary_size = MEMORY_GROUP_SIZE - header_size; // Satisfy the allocation from the fresh primary (this will // always work; the size is small) ptr = (void*)primary_ptr; primary_ptr += size; primary_size -= size; } else { // // The primary still has some useful free bytes, so keep it as // primary to increase its chance of contributing when a small // allocation comes along. // secondary_ptr = ((char*)fresh_area) + header_size; secondary_size = MEMORY_GROUP_SIZE - header_size; // Satisfy the allocation from the fresh secondary memory ares // (this will always work; the size is small) ptr = (void*)secondary_ptr; secondary_ptr += size; secondary_size -= size; } return ptr; }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // read in grid op_printf("reading in grid \n"); FILE *fp; if ( (fp = fopen("./new_grid.dat","r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (double *) malloc(2*nnode*sizeof(double)); q = (double *) malloc(4*ncell*sizeof(double)); qold = (double *) malloc(4*ncell*sizeof(double)); res = (double *) malloc(4*ncell*sizeof(double)); adt = (double *) malloc( ncell*sizeof(double)); for (int n=0; n<nnode; n++) { if (fscanf(fp,"%lf %lf \n",&x[2*n], &x[2*n+1]) != 2) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<ncell; n++) { if (fscanf(fp,"%d %d %d %d \n",&cell[4*n ], &cell[4*n+1], &cell[4*n+2], &cell[4*n+3]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1], &ecell[2*n],&ecell[2*n+1]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n=0; n<nbedge; n++) { if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1], &becell[n], &bound[n]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } fclose(fp); // set constants and initialise flow field and residual op_printf("initialising flow field \n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; for (int n=0; n<ncell; n++) { for (int m=0; m<4; m++) { q[4*n+m] = qinf[m]; res[4*n+m] = 0.0f; } } // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"double",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"double",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"double",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"double",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"double",res ,"p_res"); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&cfl ); op_decl_const(1,"double",&eps ); op_decl_const(1,"double",&mach ); op_decl_const(1,"double",&alpha); op_decl_const(4,"double",qinf ); op_diagnostic_output(); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE)); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pecell,4,"double",OP_READ), op_arg_dat(p_q, 1,pecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"double",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pecell,4,"double",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"double",OP_INC )); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"double",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double) op_get_size(cells)); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); if (iter%1000 == 0 && ncell == 720000){ //defailt mesh -- for validation testing //op_printf(" %d %3.16lf \n",iter,rms); float diff=fabs((100.0*(rms/0.0001060114637578))-100.0); op_printf("\n\nTest problem with %d cells is within %3.15E %% of the expected solution\n",720000, diff); if(diff < 0.00001) { op_printf("This test is considered PASSED\n"); } else { op_printf("This test is considered FAILED\n"); } } } op_timers(&cpu_t2, &wall_t2); //output the result dat array to files op_print_dat_to_txtfile(p_q, "out_grid_seq.dat"); //ASCI op_print_dat_to_binfile(p_q, "out_grid_seq.bin"); //Binary //write given op_dat's indicated segment of data to a memory block in the order it was originally //arranged (i.e. before partitioning and reordering) double* q_part = (double *)op_malloc(sizeof(double)*op_get_size(cells)*4); op_fetch_data_idx(p_q, q_part, 0, op_get_size(cells)-1); free(q_part); op_timing_output(); op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc, char **argv) { // OP initialisation op_init(argc, argv, 2); // MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode, ncell, nedge, nbedge; // set constants op_printf("initialising flow field\n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f * atan(1.0f) / 45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam * p / r) * mach; double e = p / (r * gm1) + 0.5f * u * u; qinf[0] = r; qinf[1] = r * u; qinf[2] = 0.0f; qinf[3] = r * e; /**------------------------BEGIN I/O -------------------**/ char file[] = "new_grid.dat"; char file_out[] = "new_grid_out.h5"; /* read in grid from disk on root processor */ FILE *fp; if ((fp = fopen(file, "r")) == NULL) { op_printf("can't open file %s\n", file); exit(-1); } int g_nnode, g_ncell, g_nedge, g_nbedge; check_scan( fscanf(fp, "%d %d %d %d \n", &g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4); int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0; double *g_x = 0, *g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0; op_printf("reading in grid \n"); op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n", g_nnode, g_ncell, g_nedge, g_nbedge); if (my_rank == MPI_ROOT) { g_cell = (int *)op_malloc(4 * g_ncell * sizeof(int)); g_edge = (int *)op_malloc(2 * g_nedge * sizeof(int)); g_ecell = (int *)op_malloc(2 * g_nedge * sizeof(int)); g_bedge = (int *)op_malloc(2 * g_nbedge * sizeof(int)); g_becell = (int *)op_malloc(g_nbedge * sizeof(int)); g_bound = (int *)op_malloc(g_nbedge * sizeof(int)); g_x = (double *)op_malloc(2 * g_nnode * sizeof(double)); g_q = (double *)op_malloc(4 * g_ncell * sizeof(double)); g_qold = (double *)op_malloc(4 * g_ncell * sizeof(double)); g_res = (double *)op_malloc(4 * g_ncell * sizeof(double)); g_adt = (double *)op_malloc(g_ncell * sizeof(double)); for (int n = 0; n < g_nnode; n++) { check_scan(fscanf(fp, "%lf %lf \n", &g_x[2 * n], &g_x[2 * n + 1]), 2); } for (int n = 0; n < g_ncell; n++) { check_scan(fscanf(fp, "%d %d %d %d \n", &g_cell[4 * n], &g_cell[4 * n + 1], &g_cell[4 * n + 2], &g_cell[4 * n + 3]), 4); } for (int n = 0; n < g_nedge; n++) { check_scan(fscanf(fp, "%d %d %d %d \n", &g_edge[2 * n], &g_edge[2 * n + 1], &g_ecell[2 * n], &g_ecell[2 * n + 1]), 4); } for (int n = 0; n < g_nbedge; n++) { check_scan(fscanf(fp, "%d %d %d %d \n", &g_bedge[2 * n], &g_bedge[2 * n + 1], &g_becell[n], &g_bound[n]), 4); } // initialise flow field and residual for (int n = 0; n < g_ncell; n++) { for (int m = 0; m < 4; m++) { g_q[4 * n + m] = qinf[m]; g_res[4 * n + m] = 0.0f; } } } fclose(fp); nnode = compute_local_size(g_nnode, comm_size, my_rank); ncell = compute_local_size(g_ncell, comm_size, my_rank); nedge = compute_local_size(g_nedge, comm_size, my_rank); nbedge = compute_local_size(g_nbedge, comm_size, my_rank); op_printf( "Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n", my_rank, nnode, ncell, nedge, nbedge); /*Allocate memory to hold local sets, mapping tables and data*/ cell = (int *)op_malloc(4 * ncell * sizeof(int)); edge = (int *)op_malloc(2 * nedge * sizeof(int)); ecell = (int *)op_malloc(2 * nedge * sizeof(int)); bedge = (int *)op_malloc(2 * nbedge * sizeof(int)); becell = (int *)op_malloc(nbedge * sizeof(int)); bound = (int *)op_malloc(nbedge * sizeof(int)); x = (double *)op_malloc(2 * nnode * sizeof(double)); q = (double *)op_malloc(4 * ncell * sizeof(double)); qold = (double *)op_malloc(4 * ncell * sizeof(double)); res = (double *)op_malloc(4 * ncell * sizeof(double)); adt = (double *)op_malloc(ncell * sizeof(double)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_cell, cell, comm_size, g_ncell, ncell, 4); scatter_int_array(g_edge, edge, comm_size, g_nedge, nedge, 2); scatter_int_array(g_ecell, ecell, comm_size, g_nedge, nedge, 2); scatter_int_array(g_bedge, bedge, comm_size, g_nbedge, nbedge, 2); scatter_int_array(g_becell, becell, comm_size, g_nbedge, nbedge, 1); scatter_int_array(g_bound, bound, comm_size, g_nbedge, nbedge, 1); scatter_double_array(g_x, x, comm_size, g_nnode, nnode, 2); scatter_double_array(g_q, q, comm_size, g_ncell, ncell, 4); scatter_double_array(g_qold, qold, comm_size, g_ncell, ncell, 4); scatter_double_array(g_res, res, comm_size, g_ncell, ncell, 4); scatter_double_array(g_adt, adt, comm_size, g_ncell, ncell, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if (my_rank == MPI_ROOT) { free(g_cell); free(g_edge); free(g_ecell); free(g_bedge); free(g_becell); free(g_bound); free(g_x); free(g_q); free(g_qold); free(g_adt); free(g_res); } /**------------------------END I/O -----------------------**/ /* FIXME: It's not clear to the compiler that sth. is going on behind the scenes here. Hence theses variables are reported as unused */ op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes, 2, edge, "pedge"); op_map pecell = op_decl_map(edges, cells, 2, ecell, "pecell"); op_map pbedge = op_decl_map(bedges, nodes, 2, bedge, "pbedge"); op_map pbecell = op_decl_map(bedges, cells, 1, becell, "pbecell"); op_map pcell = op_decl_map(cells, nodes, 4, cell, "pcell"); op_dat p_bound = op_decl_dat(bedges, 1, "int", bound, "p_bound"); op_dat p_x = op_decl_dat(nodes, 2, "double", x, "p_x"); op_dat p_q = op_decl_dat(cells, 4, "double", q, "p_q"); op_dat p_qold = op_decl_dat(cells, 4, "double", qold, "p_qold"); op_dat p_adt = op_decl_dat(cells, 1, "double", adt, "p_adt"); op_dat p_res = op_decl_dat(cells, 4, "double", res, "p_res"); /* Test out creating dataset within a nested path in an HDF5 file -- Remove when needing to create correct Airfoil mesh */ op_dat p_x_test = op_decl_dat(nodes, 2, "double", x, "/group3/group2/group1/p_x_test"); op_map pedge_test = op_decl_map(edges, nodes, 2, edge, "/group3/pedge_test"); op_decl_const(1, "double", &gam); op_decl_const(1, "double", &gm1); op_decl_const(1, "double", &cfl); op_decl_const(1, "double", &eps); op_decl_const(1, "double", &mach); op_decl_const(1, "double", &alpha); op_decl_const(4, "double", qinf); op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x); /* Test functionality of fetching data of an op_dat to an HDF5 file*/ op_fetch_data_hdf5_file(p_x_test, "test.h5"); char name[128]; int time = 0; sprintf(name, "states_%07i", (int)(time * 100.0)); op_fetch_data_hdf5_file_path(p_x_test, "test.h5", name); sprintf(name, "/results/states_%07i", (int)(time * 100.0)); op_fetch_data_hdf5_file_path(p_x_test, "test.h5", name); /* Test functionality of dumping all the sets,maps and dats to an HDF5 file*/ op_dump_to_hdf5(file_out); op_write_const_hdf5("gam", 1, "double", (char *)&gam, "new_grid_out.h5"); op_write_const_hdf5("gm1", 1, "double", (char *)&gm1, "new_grid_out.h5"); op_write_const_hdf5("cfl", 1, "double", (char *)&cfl, "new_grid_out.h5"); op_write_const_hdf5("eps", 1, "double", (char *)&eps, "new_grid_out.h5"); op_write_const_hdf5("mach", 1, "double", (char *)&mach, "new_grid_out.h5"); op_write_const_hdf5("alpha", 1, "double", (char *)&alpha, "new_grid_out.h5"); op_write_const_hdf5("qinf", 4, "double", (char *)qinf, "new_grid_out.h5"); // create halos - for sanity check op_halo_create(); op_exit(); }
void* WindowsOpThreadTools::Allocate(size_t size) { return op_malloc(size); }
RepList::RepList(int n) { dat = (replentry **) op_malloc(sizeof(replentry *) * n); if (dat == 0) size = 0; else size = n; pos = 0; }