void save_soln_host(const char *userSubroutine,op_set set,op_arg opDat1,op_arg opDat2)
{
  size_t blocksPerGrid;
  size_t threadsPerBlock;
  size_t totalThreadNumber;
  size_t dynamicSharedMemorySize;
  cl_int errorCode;
  cl_event event;
  cl_kernel kernelPointer;
  int sharedMemoryOffset;
  double cpu_t1, cpu_t2, wall_t1, wall_t2;
  op_timers(&cpu_t1, &wall_t1);
  blocksPerGrid = 200;
  threadsPerBlock = threadsPerBlockSize_save_soln;
  totalThreadNumber = threadsPerBlock * blocksPerGrid;
  dynamicSharedMemorySize = 0;
  dynamicSharedMemorySize = MAX(dynamicSharedMemorySize,sizeof(float ) * 4);
  dynamicSharedMemorySize = MAX(dynamicSharedMemorySize,sizeof(float ) * 4);
  sharedMemoryOffset = dynamicSharedMemorySize * OP_WARPSIZE;
  dynamicSharedMemorySize = dynamicSharedMemorySize * threadsPerBlock;
  kernelPointer = getKernel("save_soln_kernel");
  errorCode = clSetKernelArg(kernelPointer,0,sizeof(cl_mem ),&opDat1.data_d);
  errorCode = errorCode | clSetKernelArg(kernelPointer,1,sizeof(cl_mem ),&opDat2.data_d);
  errorCode = errorCode | clSetKernelArg(kernelPointer,2,sizeof(int ),&sharedMemoryOffset);
  errorCode = errorCode | clSetKernelArg(kernelPointer,3,sizeof(int ),&set -> size);
  //errorCode = errorCode | clSetKernelArg(kernelPointer,4,sizeof(size_t ),&dynamicSharedMemorySize);
  errorCode = errorCode | clSetKernelArg(kernelPointer,4,dynamicSharedMemorySize,NULL);
  //printf("errorCode after 5: %d\n", errorCode);
  assert_m(errorCode == CL_SUCCESS,"Error setting OpenCL kernel arguments save_soln");
  errorCode = clEnqueueNDRangeKernel(cqCommandQueue,kernelPointer,1,NULL,&totalThreadNumber,&threadsPerBlock,0,NULL,&event);
  assert_m(errorCode == CL_SUCCESS,"Error executing OpenCL kernel save_soln");
  errorCode = clFinish(cqCommandQueue);
  assert_m(errorCode == CL_SUCCESS,"Error completing device command queue");

#ifdef PROFILE
  unsigned long tqueue, tsubmit, tstart, tend, telapsed;
  ciErrNum  = clGetEventProfilingInfo( ceEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(tqueue), &tqueue, NULL );
  ciErrNum |= clGetEventProfilingInfo( ceEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(tsubmit), &tsubmit, NULL );
  ciErrNum |= clGetEventProfilingInfo( ceEvent, CL_PROFILING_COMMAND_START, sizeof(tstart), &tstart, NULL );
  ciErrNum |= clGetEventProfilingInfo( ceEvent, CL_PROFILING_COMMAND_END, sizeof(tend), &tend, NULL );
  assert_m( ciErrNum == CL_SUCCESS, "error getting profiling info" );
  OP_kernels[0].queue_time      += (tsubmit - tqueue);
  OP_kernels[0].wait_time       += (tstart - tsubmit);
  OP_kernels[0].execution_time  += (tend - tstart);
  //printf("%20lu\n%20lu\n%20lu\n%20lu\n\n", tqueue, tsubmit, tstart, tend);
  //printf("queue: %8.4f\nwait:%8.4f\nexec: %8.4f\n\n", OP_kernels[0].queue_time * 1.0e-9, OP_kernels[0].wait_time * 1.0e-9, OP_kernels[0].execution_time * 1.0e-9 );
#endif

  // update kernel record

  op_timers(&cpu_t2, &wall_t2);
  op_timing_realloc(0);
  OP_kernels[0].name      = userSubroutine;
  OP_kernels[0].count    += 1;
  OP_kernels[0].time     += wall_t2 - wall_t1;
  OP_kernels[0].transfer += (float)set->size * opDat1.size;
  OP_kernels[0].transfer += (float)set->size * opDat2.size;
}
示例#2
0
void op_par_loop_save_soln(char const *name, op_set set,
                           op_arg arg0,
                           op_arg arg1 ) {


    if (OP_diags>2) {
        printf(" kernel routine w/o indirection:  save_soln \n");
    }

    // initialise timers

    double cpu_t1, cpu_t2, wall_t1, wall_t2;
    op_timers(&cpu_t1, &wall_t1);

    // set number of threads

#ifdef _OPENMP
    int nthreads = omp_get_max_threads( );
#else
    int nthreads = 1;
#endif

    // execute plan

    #pragma omp parallel for
    for (int thr=0; thr<nthreads; thr++) {
        int start  = (set->size* thr   )/nthreads;
        int finish = (set->size*(thr+1))/nthreads;
        op_x86_save_soln( (float *) arg0.data,
                          (float *) arg1.data,
                          start, finish );
    }

    // update kernel record

    op_timers(&cpu_t2, &wall_t2);
    op_timing_realloc(0);
    OP_kernels[0].name      = name;
    OP_kernels[0].count    += 1;
    OP_kernels[0].time     += wall_t2 - wall_t1;
    OP_kernels[0].transfer += (float)set->size * arg0.size;
    OP_kernels[0].transfer += (float)set->size * arg1.size;
}
示例#3
0
int main(int argc, char **argv)
{
  // OP initialisation
  op_init(argc,argv,2);

  int    niter;
  double  rms;

  //timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  // set constants and initialise flow field and residual
  op_printf("initialising flow field \n");

  char file[] = "new_grid.h5";

  // declare sets, pointers, datasets and global constants

  op_set nodes  = op_decl_set_hdf5(file, "nodes");
  op_set edges  = op_decl_set_hdf5(file,  "edges");
  op_set bedges = op_decl_set_hdf5(file, "bedges");
  op_set cells  = op_decl_set_hdf5(file,  "cells");

  op_map pedge   = op_decl_map_hdf5(edges, nodes, 2, file, "pedge");
  op_map pecell  = op_decl_map_hdf5(edges, cells,2, file, "pecell");
  op_map pbedge  = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge");
  op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell");
  op_map pcell   = op_decl_map_hdf5(cells, nodes,4, file, "pcell");

  op_map m_test  = op_decl_map_hdf5(cells, nodes,4, file, "m_test");
  if (m_test == NULL) printf("m_test not found\n");

  op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int"  ,file,"p_bound");
  op_dat p_x     = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x");
  op_dat p_q     = op_decl_dat_hdf5(cells ,4,"double",file,"p_q");
  op_dat p_qold  = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold");
  op_dat p_adt   = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt");
  op_dat p_res   = op_decl_dat_hdf5(cells ,4,"double",file,"p_res");

  op_dat p_test  = op_decl_dat_hdf5(cells ,4,"double",file,"p_test");
  if (p_test == NULL) printf("p_test not found\n");

  op_get_const_hdf5("gam", 1, "double", (char *)&gam, "new_grid.h5");
  op_get_const_hdf5("gm1", 1, "double", (char *)&gm1, "new_grid.h5");
  op_get_const_hdf5("cfl", 1, "double", (char *)&cfl, "new_grid.h5");
  op_get_const_hdf5("eps", 1, "double", (char *)&eps, "new_grid.h5");
  op_get_const_hdf5("mach", 1, "double", (char *)&mach, "new_grid.h5");
  op_get_const_hdf5("alpha", 1, "double", (char *)&alpha, "new_grid.h5");
  op_get_const_hdf5("qinf", 4, "double", (char *)&qinf, "new_grid.h5");

  op_decl_const2("gam",1,"double",&gam);
  op_decl_const2("gm1",1,"double",&gm1);
  op_decl_const2("cfl",1,"double",&cfl);
  op_decl_const2("eps",1,"double",&eps);
  op_decl_const2("mach",1,"double",&mach);
  op_decl_const2("alpha",1,"double",&alpha);
  op_decl_const2("qinf",4,"double",qinf);

  op_diagnostic_output();

  //write back original data just to compare you read the file correctly
  //do an h5diff between new_grid_out.h5 and new_grid.h5 to
  //compare two hdf5 files
  op_dump_to_hdf5("new_grid_out.h5");

  op_write_const_hdf5("gam",1,"double",(char *)&gam,  "new_grid_out.h5");
  op_write_const_hdf5("gm1",1,"double",(char *)&gm1,  "new_grid_out.h5");
  op_write_const_hdf5("cfl",1,"double",(char *)&cfl,  "new_grid_out.h5");
  op_write_const_hdf5("eps",1,"double",(char *)&eps,  "new_grid_out.h5");
  op_write_const_hdf5("mach",1,"double",(char *)&mach,  "new_grid_out.h5");
  op_write_const_hdf5("alpha",1,"double",(char *)&alpha,  "new_grid_out.h5");
  op_write_const_hdf5("qinf",4,"double",(char *)qinf,  "new_grid_out.h5");

  //trigger partitioning and halo creation routines
  op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x);
  //op_partition("PARMETIS", "KWAY", edges, pecell, p_x);

  int g_ncell = op_get_size(cells);


  //initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  // main time-marching loop

  niter = 1000;

  for(int iter=1; iter<=niter; iter++) {

    //  save old flow solution

    op_par_loop_save_soln("save_soln",cells,
                op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ),
                op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_WRITE));

    //  predictor/corrector update loop

    for(int k=0; k<2; k++) {

      //    calculate area/timstep

      op_par_loop_adt_calc("adt_calc",cells,
                  op_arg_dat(p_x,0,pcell,2,"double",OP_READ),
                  op_arg_dat(p_x,1,pcell,2,"double",OP_READ),
                  op_arg_dat(p_x,2,pcell,2,"double",OP_READ),
                  op_arg_dat(p_x,3,pcell,2,"double",OP_READ),
                  op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ),
                  op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_WRITE));

      //    calculate flux residual

      op_par_loop_res_calc("res_calc",edges,
                  op_arg_dat(p_x,0,pedge,2,"double",OP_READ),
                  op_arg_dat(p_x,1,pedge,2,"double",OP_READ),
                  op_arg_dat(p_q,0,pecell,4,"double",OP_READ),
                  op_arg_dat(p_q,1,pecell,4,"double",OP_READ),
                  op_arg_dat(p_adt,0,pecell,1,"double",OP_READ),
                  op_arg_dat(p_adt,1,pecell,1,"double",OP_READ),
                  op_arg_dat(p_res,0,pecell,4,"double",OP_INC),
                  op_arg_dat(p_res,1,pecell,4,"double",OP_INC));

      op_par_loop_bres_calc("bres_calc",bedges,
                  op_arg_dat(p_x,0,pbedge,2,"double",OP_READ),
                  op_arg_dat(p_x,1,pbedge,2,"double",OP_READ),
                  op_arg_dat(p_q,0,pbecell,4,"double",OP_READ),
                  op_arg_dat(p_adt,0,pbecell,1,"double",OP_READ),
                  op_arg_dat(p_res,0,pbecell,4,"double",OP_INC),
                  op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ));

      //    update flow field

      rms = 0.0;

      op_par_loop_update("update",cells,
                  op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_READ),
                  op_arg_dat(p_q,-1,OP_ID,4,"double",OP_WRITE),
                  op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW),
                  op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_READ),
                  op_arg_gbl(&rms,1,"double",OP_INC));
    }

    //  print iteration history

    rms = sqrt(rms/(double)g_ncell);

    if (iter%100 == 0)
      op_printf(" %d  %10.5e \n",iter,rms);
  }

  op_timers(&cpu_t2, &wall_t2);

  //write given op_dat's indicated segment of data to a memory block in the order it was originally
  //arranged (i.e. before partitioning and reordering)
  double* q = (double *)op_malloc(sizeof(double)*op_get_size(cells)*4);
  op_fetch_data_idx(p_q, q, 0, op_get_size(cells)-1);
  free(q);

  //write given op_dat's data to hdf5 file in the order it was originally arranged (i.e. before partitioning and reordering)
  op_fetch_data_hdf5_file(p_q, "file_name.h5");

  //printf("Root process = %d\n",op_is_root());

  //output the result dat array to files
  //op_dump_to_hdf5("new_grid_out.h5"); //writes data as it is held on each process (under MPI)

  //compress using
  // ~/hdf5/bin/h5repack -f GZIP=9 new_grid.h5 new_grid_pack.h5

  op_timing_output();
  op_printf("Max total runtime = %f\n",wall_t2-wall_t1);
  op_exit();
}
示例#4
0
int main(int argc, char **argv)
{
  // OP initialisation
  op_init(argc,argv,2);

  int    *becell, *ecell,  *bound, *bedge, *edge, *cell;
  double  *x, *q, *qold, *adt, *res;

  int    nnode,ncell,nedge,nbedge,niter;
  double  rms;

  //timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  // read in grid

  op_printf("reading in grid \n");

  FILE *fp;
  if ( (fp = fopen("./new_grid.dat","r")) == NULL) {
    op_printf("can't open file new_grid.dat\n"); exit(-1);
  }

  if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) {
    op_printf("error reading from new_grid.dat\n"); exit(-1);
  }

  cell   = (int *) malloc(4*ncell*sizeof(int));
  edge   = (int *) malloc(2*nedge*sizeof(int));
  ecell  = (int *) malloc(2*nedge*sizeof(int));
  bedge  = (int *) malloc(2*nbedge*sizeof(int));
  becell = (int *) malloc(  nbedge*sizeof(int));
  bound  = (int *) malloc(  nbedge*sizeof(int));

  x      = (double *) malloc(2*nnode*sizeof(double));
  q      = (double *) malloc(4*ncell*sizeof(double));
  qold   = (double *) malloc(4*ncell*sizeof(double));
  res    = (double *) malloc(4*ncell*sizeof(double));
  adt    = (double *) malloc(  ncell*sizeof(double));

  for (int n=0; n<nnode; n++) {
    if (fscanf(fp,"%lf %lf \n",&x[2*n], &x[2*n+1]) != 2) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<ncell; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&cell[4*n  ], &cell[4*n+1],
                                   &cell[4*n+2], &cell[4*n+3]) != 4) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<nedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1],
                                   &ecell[2*n],&ecell[2*n+1]) != 4) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<nbedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1],
                                   &becell[n], &bound[n]) != 4) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  fclose(fp);

  // set constants and initialise flow field and residual

  op_printf("initialising flow field \n");

  gam = 1.4f;
  gm1 = gam - 1.0f;
  cfl = 0.9f;
  eps = 0.05f;

  double mach  = 0.4f;
  double alpha = 3.0f*atan(1.0f)/45.0f;
  double p     = 1.0f;
  double r     = 1.0f;
  double u     = sqrt(gam*p/r)*mach;
  double e     = p/(r*gm1) + 0.5f*u*u;

  qinf[0] = r;
  qinf[1] = r*u;
  qinf[2] = 0.0f;
  qinf[3] = r*e;

  for (int n=0; n<ncell; n++) {
    for (int m=0; m<4; m++) {
        q[4*n+m] = qinf[m];
      res[4*n+m] = 0.0f;
    }
  }

  // declare sets, pointers, datasets and global constants

  op_set nodes  = op_decl_set(nnode,  "nodes");
  op_set edges  = op_decl_set(nedge,  "edges");
  op_set bedges = op_decl_set(nbedge, "bedges");
  op_set cells  = op_decl_set(ncell,  "cells");

  op_map pedge   = op_decl_map(edges, nodes,2,edge,  "pedge");
  op_map pecell  = op_decl_map(edges, cells,2,ecell, "pecell");
  op_map pbedge  = op_decl_map(bedges,nodes,2,bedge, "pbedge");
  op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell");
  op_map pcell   = op_decl_map(cells, nodes,4,cell,  "pcell");

  op_dat p_bound = op_decl_dat(bedges,1,"int"  ,bound,"p_bound");
  op_dat p_x     = op_decl_dat(nodes ,2,"double",x    ,"p_x");
  op_dat p_q     = op_decl_dat(cells ,4,"double",q    ,"p_q");
  //op_dat p_qold  = op_decl_dat(cells ,4,"double",qold ,"p_qold");
  //op_dat p_adt   = op_decl_dat(cells ,1,"double",adt  ,"p_adt");
  //op_dat p_res   = op_decl_dat(cells ,4,"double",res  ,"p_res");

  // p_res, p_adt and p_qold  now declared as a temp op_dats during
  // the execution of the time-marching loop

  op_decl_const2("gam",1,"double",&gam);
  op_decl_const2("gm1",1,"double",&gm1);
  op_decl_const2("cfl",1,"double",&cfl);
  op_decl_const2("eps",1,"double",&eps);
  op_decl_const2("mach",1,"double",&mach);
  op_decl_const2("alpha",1,"double",&alpha);
  op_decl_const2("qinf",4,"double",qinf);

  op_diagnostic_output();

  double g_ncell = op_get_size(cells);

  //initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  // main time-marching loop

  niter = 1000;

  for(int iter=1; iter<=niter; iter++) {

    double* tmp_elem = NULL;
    op_dat p_res   = op_decl_dat_temp(cells ,4,"double",tmp_elem,"p_res");
    op_dat p_adt   = op_decl_dat_temp(cells ,1,"double",tmp_elem,"p_adt");
    op_dat p_qold  = op_decl_dat_temp(cells ,4,"double",qold ,"p_qold");

    // save old flow solution

    op_par_loop_save_soln("save_soln",cells,
                op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ),
                op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_WRITE));

    // predictor/corrector update loop

    for(int k=0; k<2; k++) {

      // calculate area/timstep

      op_par_loop_adt_calc("adt_calc",cells,
                  op_arg_dat(p_x,0,pcell,2,"double",OP_READ),
                  op_arg_dat(p_x,1,pcell,2,"double",OP_READ),
                  op_arg_dat(p_x,2,pcell,2,"double",OP_READ),
                  op_arg_dat(p_x,3,pcell,2,"double",OP_READ),
                  op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ),
                  op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_WRITE));

      // calculate flux residual

      op_par_loop_res_calc("res_calc",edges,
                  op_arg_dat(p_x,0,pedge,2,"double",OP_READ),
                  op_arg_dat(p_x,1,pedge,2,"double",OP_READ),
                  op_arg_dat(p_q,0,pecell,4,"double",OP_READ),
                  op_arg_dat(p_q,1,pecell,4,"double",OP_READ),
                  op_arg_dat(p_adt,0,pecell,1,"double",OP_READ),
                  op_arg_dat(p_adt,1,pecell,1,"double",OP_READ),
                  op_arg_dat(p_res,0,pecell,4,"double",OP_INC),
                  op_arg_dat(p_res,1,pecell,4,"double",OP_INC));

      op_par_loop_bres_calc("bres_calc",bedges,
                  op_arg_dat(p_x,0,pbedge,2,"double",OP_READ),
                  op_arg_dat(p_x,1,pbedge,2,"double",OP_READ),
                  op_arg_dat(p_q,0,pbecell,4,"double",OP_READ),
                  op_arg_dat(p_adt,0,pbecell,1,"double",OP_READ),
                  op_arg_dat(p_res,0,pbecell,4,"double",OP_INC),
                  op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ));

      // update flow field

      rms = 0.0;

      op_par_loop_update("update",cells,
                  op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_READ),
                  op_arg_dat(p_q,-1,OP_ID,4,"double",OP_WRITE),
                  op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW),
                  op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_READ),
                  op_arg_gbl(&rms,1,"double",OP_INC));
    }

    // print iteration history
    rms = sqrt(rms/(double)g_ncell );
    if (iter%100 == 0)
      op_printf(" %d  %10.5e \n",iter,rms);

    if (iter%1000 == 0 && g_ncell == 720000){ //defailt mesh -- for validation testing
      //op_printf(" %d  %3.16f \n",iter,rms);
      double diff=fabs((100.0*(rms/0.0001060114637578))-100.0);
      op_printf("\n\nTest problem with %d cells is within %3.15E %% of the expected solution\n",720000, diff);
      if(diff < 0.00001) {
        op_printf("This test is considered PASSED\n");
      }
      else {
        op_printf("This test is considered FAILED\n");
      }
    }

    if (op_free_dat_temp(p_res) < 0)
      op_printf("Error: temporary op_dat %s cannot be removed\n",p_res->name);
    if (op_free_dat_temp(p_adt) < 0)
      op_printf("Error: temporary op_dat %s cannot be removed\n",p_adt->name);
    if (op_free_dat_temp(p_qold) < 0)
      op_printf("Error: temporary op_dat %s cannot be removed\n",p_qold->name);
  }

  op_timers(&cpu_t2, &wall_t2);
  op_timing_output();
  op_printf("Max total runtime = %f\n",wall_t2-wall_t1);

  op_exit();

  free(cell);
  free(edge);
  free(ecell);
  free(bedge);
  free(becell);
  free(bound);
  free(x);
  free(q);
  free(qold);
  free(res);
  free(adt);
}
示例#5
0
int main(int argc, char **argv)
{
  // OP initialisation
  op_init(argc,argv,2);

  int    *becell, *ecell,  *bound, *bedge, *edge, *cell;
  float  *x, *q, *qold, *adt, *res;

  int    nnode,ncell,nedge,nbedge,niter;
  float  rms;

  //timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  // read in grid

  op_printf("reading in grid \n");

  FILE *fp;
  if ( (fp = fopen("./new_grid.dat","r")) == NULL) {
    op_printf("can't open file new_grid.dat\n"); exit(-1);
  }

  if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) {
    op_printf("error reading from new_grid.dat\n"); exit(-1);
  }

  cell   = (int *) malloc(4*ncell*sizeof(int));
  edge   = (int *) malloc(2*nedge*sizeof(int));
  ecell  = (int *) malloc(2*nedge*sizeof(int));
  bedge  = (int *) malloc(2*nbedge*sizeof(int));
  becell = (int *) malloc(  nbedge*sizeof(int));
  bound  = (int *) malloc(  nbedge*sizeof(int));

  x      = (float *) malloc(2*nnode*sizeof(float));
  q      = (float *) malloc(4*ncell*sizeof(float));
  qold   = (float *) malloc(4*ncell*sizeof(float));
  res    = (float *) malloc(4*ncell*sizeof(float));
  adt    = (float *) malloc(  ncell*sizeof(float));

  for (int n=0; n<nnode; n++) {
    if (fscanf(fp,"%f %f \n",&x[2*n], &x[2*n+1]) != 2) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<ncell; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&cell[4*n  ], &cell[4*n+1],
                                   &cell[4*n+2], &cell[4*n+3]) != 4) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<nedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1],
                                   &ecell[2*n],&ecell[2*n+1]) != 4) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<nbedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1],
                                   &becell[n], &bound[n]) != 4) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  fclose(fp);

  // set constants and initialise flow field and residual

  op_printf("initialising flow field \n");

  gam = 1.4f;
  gm1 = gam - 1.0f;
  cfl = 0.9f;
  eps = 0.05f;

  float mach  = 0.4f;
  float alpha = 3.0f*atan(1.0f)/45.0f;
  float p     = 1.0f;
  float r     = 1.0f;
  float u     = sqrt(gam*p/r)*mach;
  float e     = p/(r*gm1) + 0.5f*u*u;

  qinf[0] = r;
  qinf[1] = r*u;
  qinf[2] = 0.0f;
  qinf[3] = r*e;

  for (int n=0; n<ncell; n++) {
    for (int m=0; m<4; m++) {
        q[4*n+m] = qinf[m];
      res[4*n+m] = 0.0f;
    }
  }

  // declare sets, pointers, datasets and global constants

  op_set nodes  = op_decl_set(nnode,  "nodes");
  op_set edges  = op_decl_set(nedge,  "edges");
  op_set bedges = op_decl_set(nbedge, "bedges");
  op_set cells  = op_decl_set(ncell,  "cells");

  op_map pedge   = op_decl_map(edges, nodes,2,edge,  "pedge");
  op_map pecell  = op_decl_map(edges, cells,2,ecell, "pecell");
  op_map pbedge  = op_decl_map(bedges,nodes,2,bedge, "pbedge");
  op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell");
  op_map pcell   = op_decl_map(cells, nodes,4,cell,  "pcell");

  op_dat p_bound = op_decl_dat(bedges,1,"int"  ,bound,"p_bound");
  op_dat p_x     = op_decl_dat(nodes ,2,"float",x    ,"p_x");
  op_dat p_q     = op_decl_dat(cells ,4,"float",q    ,"p_q");
  op_dat p_qold  = op_decl_dat(cells ,4,"float",qold ,"p_qold");
  op_dat p_adt   = op_decl_dat(cells ,1,"float",adt  ,"p_adt");
  op_dat p_res   = op_decl_dat(cells ,4,"float",res  ,"p_res");

  op_decl_const2("gam",1,"float",&gam);
  op_decl_const2("gm1",1,"float",&gm1);
  op_decl_const2("cfl",1,"float",&cfl);
  op_decl_const2("eps",1,"float",&eps);
  op_decl_const2("mach",1,"float",&mach);
  op_decl_const2("alpha",1,"float",&alpha);
  op_decl_const2("qinf",4,"float",qinf);

  op_diagnostic_output();

  //initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  // main time-marching loop

  niter = 1000;

  for(int iter=1; iter<=niter; iter++) {

    // save old flow solution

    op_par_loop_save_soln("save_soln",cells,
                op_arg_dat(p_q,-1,OP_ID,4,"float",OP_READ),
                op_arg_dat(p_qold,-1,OP_ID,4,"float",OP_WRITE));

    // predictor/corrector update loop

    for(int k=0; k<2; k++) {

      // calculate area/timstep

      op_par_loop_adt_calc("adt_calc",cells,
                  op_arg_dat(p_x,0,pcell,2,"float",OP_READ),
                  op_arg_dat(p_x,1,pcell,2,"float",OP_READ),
                  op_arg_dat(p_x,2,pcell,2,"float",OP_READ),
                  op_arg_dat(p_x,3,pcell,2,"float",OP_READ),
                  op_arg_dat(p_q,-1,OP_ID,4,"float",OP_READ),
                  op_arg_dat(p_adt,-1,OP_ID,1,"float",OP_WRITE));

      // calculate flux residual

      op_par_loop_res_calc("res_calc",edges,
                  op_arg_dat(p_x,0,pedge,2,"float",OP_READ),
                  op_arg_dat(p_x,1,pedge,2,"float",OP_READ),
                  op_arg_dat(p_q,0,pecell,4,"float",OP_READ),
                  op_arg_dat(p_q,1,pecell,4,"float",OP_READ),
                  op_arg_dat(p_adt,0,pecell,1,"float",OP_READ),
                  op_arg_dat(p_adt,1,pecell,1,"float",OP_READ),
                  op_arg_dat(p_res,0,pecell,4,"float",OP_INC),
                  op_arg_dat(p_res,1,pecell,4,"float",OP_INC));

      op_par_loop_bres_calc("bres_calc",bedges,
                  op_arg_dat(p_x,0,pbedge,2,"float",OP_READ),
                  op_arg_dat(p_x,1,pbedge,2,"float",OP_READ),
                  op_arg_dat(p_q,0,pbecell,4,"float",OP_READ),
                  op_arg_dat(p_adt,0,pbecell,1,"float",OP_READ),
                  op_arg_dat(p_res,0,pbecell,4,"float",OP_INC),
                  op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ));

      // update flow field

      rms = 0.0;

      op_par_loop_update("update",cells,
                  op_arg_dat(p_qold,-1,OP_ID,4,"float",OP_READ),
                  op_arg_dat(p_q,-1,OP_ID,4,"float",OP_WRITE),
                  op_arg_dat(p_res,-1,OP_ID,4,"float",OP_RW),
                  op_arg_dat(p_adt,-1,OP_ID,1,"float",OP_READ),
                  op_arg_gbl(&rms,1,"float",OP_INC));
    }

    // print iteration history
    rms = sqrt(rms/(float) op_get_size(cells));
    if (iter%100 == 0)
      op_printf(" %d  %10.5e \n",iter,rms);
  }

  op_timers(&cpu_t2, &wall_t2);
  op_timing_output();
  op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1);

  op_exit();

  free(cell);
  free(edge);
  free(ecell);
  free(bedge);
  free(becell);
  free(bound);
  free(x);
  free(q);
  free(qold);
  free(res);
  free(adt);
}
示例#6
0
int main(int argc, char **argv)
{
  // OP initialisation
  op_init(argc,argv,2);

  int    *becell, *ecell,  *bound, *bedge, *edge, *cell;
  double  *x, *q, *qold, *adt, *res;

  int    nnode,ncell,nedge,nbedge;

  //timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  // read in airfoil grid

  op_printf("reading in data \n");

  FILE *fp;
  if ( (fp = fopen("./new_grid.dat","r")) == NULL) {
    op_printf("can't open file new_grid.dat\n"); exit(-1);
  }

  if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) {
    op_printf("error reading from new_grid.dat\n"); exit(-1);
  }

  cell   = (int *) malloc(4*ncell*sizeof(int));
  edge   = (int *) malloc(2*nedge*sizeof(int));
  ecell  = (int *) malloc(2*nedge*sizeof(int));
  bedge  = (int *) malloc(2*nbedge*sizeof(int));
  becell = (int *) malloc(  nbedge*sizeof(int));
  bound  = (int *) malloc(  nbedge*sizeof(int));

  x      = (double *) malloc(2*nnode*sizeof(double));
  q      = (double *) malloc(4*ncell*sizeof(double));
  qold   = (double *) malloc(4*ncell*sizeof(double));
  res    = (double *) malloc(4*ncell*sizeof(double));
  adt    = (double *) malloc(  ncell*sizeof(double));

  for (int n=0; n<nnode; n++) {
    if (fscanf(fp,"%lf %lf \n",&x[2*n], &x[2*n+1]) != 2) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<ncell; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&cell[4*n  ], &cell[4*n+1],
        &cell[4*n+2], &cell[4*n+3]) != 4) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<nedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1],
        &ecell[2*n],&ecell[2*n+1]) != 4) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<nbedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1],
        &becell[n], &bound[n]) != 4) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  fclose(fp);

  // declare sets, pointers, datasets

  op_set edges  = op_decl_set(nedge,  "edges");
  op_set cells  = op_decl_set(ncell,  "cells");

  op_map pecell  = op_decl_map(edges, cells,2,ecell, "pecell");
  op_dat p_res   = op_decl_dat(cells ,4,"double",res  ,"p_res");

  int count;

  op_diagnostic_output();

  //initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  //indirect reduction
  count = 0;
  op_par_loop_res_calc("res_calc",edges,
              op_arg_dat(p_res,0,pecell,4,"double",OP_INC),
              op_arg_gbl(&count,1,"int",OP_INC));
  op_printf("number of edges:: %d should be: %d \n",count,nedge);
  if (count != nedge) op_printf("indirect reduction FAILED\n");
  else op_printf("indirect reduction PASSED\n");
  //direct reduction
  count = 0;
  op_par_loop_update("update",cells,
              op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW),
              op_arg_gbl(&count,1,"int",OP_INC));
  op_printf("number of cells: %d should be: %d \n",count,ncell);
  if (count != ncell) op_printf("direct reduction FAILED\n");
  else op_printf("direct reduction PASSED\n");

  op_timers(&cpu_t2, &wall_t2);
  op_timing_output();

  op_exit();

  free(cell);
  free(edge);
  free(ecell);
  free(bedge);
  free(becell);
  free(bound);
  free(x);
  free(q);
  free(qold);
  free(res);
  free(adt);
}
示例#7
0
int main(int argc, char **argv)
{
  // OP initialisation
  op_init(argc,argv,2);

  //MPI for user I/O
  int my_rank;
  int comm_size;
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);

  //timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  int *pp;
  double *A, *r, *u, *du;

  int   nnode, nedge;

  /**------------------------BEGIN I/O and PARTITIONING ---------------------**/

  int g_nnode, g_nedge, g_n, g_e;

  g_nnode = (NN-1)*(NN-1);
  g_nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2);

  int *g_pp = 0;
  double *g_A = 0, *g_r = 0, *g_u = 0, *g_du = 0;

  op_printf("Global number of nodes, edges = %d, %d\n",g_nnode,g_nedge);

  if(my_rank == MPI_ROOT) {
    g_pp = (int *)malloc(sizeof(int)*2*g_nedge);

    g_A  = (double *)malloc(sizeof(double)*g_nedge);
    g_r  = (double *)malloc(sizeof(double)*g_nnode);
    g_u  = (double *)malloc(sizeof(double)*g_nnode);
    g_du = (double *)malloc(sizeof(double)*g_nnode);

    // create matrix and r.h.s., and set coordinates needed for renumbering / partitioning

    g_e = 0;

    for (int i=1; i<NN; i++) {
      for (int j=1; j<NN; j++) {
        g_n         = i-1 + (j-1)*(NN-1);
        g_r[g_n]      = 0.0f;
        g_u[g_n]      = 0.0f;
        g_du[g_n]     = 0.0f;

        g_pp[2*g_e]   = g_n;
        g_pp[2*g_e+1] = g_n;
        g_A[g_e]      = -1.0f;
        g_e++;

        for (int pass=0; pass<4; pass++) {
          int i2 = i;
          int j2 = j;
          if (pass==0) i2 += -1;
          if (pass==1) i2 +=  1;
          if (pass==2) j2 += -1;
          if (pass==3) j2 +=  1;

          if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) {
            g_r[g_n] += 0.25f;
          }
          else {
            g_pp[2*g_e]   = g_n;
            g_pp[2*g_e+1] = i2-1 + (j2-1)*(NN-1);
            g_A[g_e]      = 0.25f;
            g_e++;
          }
        }
      }
    }
  }

  /* Compute local sizes */
  nnode = compute_local_size (g_nnode, comm_size, my_rank);
  nedge = compute_local_size (g_nedge, comm_size, my_rank);
  op_printf("Number of nodes, edges on process %d = %d, %d\n"
      ,my_rank,nnode,nedge);

  /*Allocate memory to hold local sets, mapping tables and data*/
  pp = (int *)malloc(2*sizeof(int)*nedge);

  A      = (double *) malloc(nedge*sizeof(double));
  r      = (double *) malloc(nnode*sizeof(double));
  u      = (double *) malloc(nnode*sizeof(double));
  du     = (double *) malloc(nnode*sizeof(double));

  /* scatter sets, mappings and data on sets*/
  scatter_int_array(g_pp, pp, comm_size, g_nedge,nedge, 2);
  scatter_double_array(g_A, A, comm_size, g_nedge,nedge, 1);
  scatter_double_array(g_r, r, comm_size, g_nnode,nnode, 1);
  scatter_double_array(g_u, u, comm_size, g_nnode,nnode, 1);
  scatter_double_array(g_du, du, comm_size, g_nnode,nnode, 1);

  /*Freeing memory allocated to gloabal arrays on rank 0
    after scattering to all processes*/
  if(my_rank == MPI_ROOT) {
    free(g_pp);
    free(g_A);
    free(g_r);
    free(g_u);
    free(g_du);
  }

  /**------------------------END I/O and PARTITIONING ---------------------**/

  // declare sets, pointers, and datasets

  op_set nodes = op_decl_set(nnode,"nodes");
  op_set edges = op_decl_set(nedge,"edges");

  op_map ppedge = op_decl_map(edges,nodes,2,pp, "ppedge");

  op_dat p_A = op_decl_dat(edges,1,"double", A,  "p_A" );
  op_dat p_r = op_decl_dat(nodes,1,"double", r,  "p_r" );
  op_dat p_u = op_decl_dat(nodes,1,"double", u,  "p_u" );
  op_dat p_du = op_decl_dat(nodes,1,"double", du,"p_du");

  alpha = 1.0f;
  op_decl_const(1,"double",&alpha);

  op_diagnostic_output();

  //trigger partitioning and halo creation routines
  op_partition("PTSCOTCH", "KWAY", NULL, NULL, NULL);

  //initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  // main iteration loop

  double u_sum, u_max, beta = 1.0f;

  for (int iter=0; iter<NITER; iter++) {
    op_par_loop(res,"res", edges,
        op_arg_dat(p_A,  -1,OP_ID,  1,"double", OP_READ),
        op_arg_dat(p_u,   1,ppedge, 1,"double", OP_READ),
        op_arg_dat(p_du,  0,ppedge, 1,"double", OP_INC),
        op_arg_gbl(&beta, 1,"double", OP_READ));

    u_sum = 0.0f;
    u_max = 0.0f;
    op_par_loop(update,"update", nodes,
        op_arg_dat(p_r,   -1,OP_ID, 1,"double",OP_READ),
        op_arg_dat(p_du,  -1,OP_ID, 1,"double",OP_RW),
        op_arg_dat(p_u,   -1,OP_ID, 1,"double",OP_INC),
        op_arg_gbl(&u_sum,1,"double",OP_INC),
        op_arg_gbl(&u_max,1,"double",OP_MAX));

    op_printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/g_nnode));
  }

  op_timers(&cpu_t2, &wall_t2);

  //get results data array
  op_dat temp = op_mpi_get_data(p_u);

  //output the result dat array to files
  print_dat_tofile(temp, "out_grid.dat"); //ASCI
  //print_dat_tobinfile(temp, "out_grid.bin"); //Binary

  //print each mpi process's timing info for each kernel
  op_timing_output();

  //print total time for niter interations
  op_printf("Max total runtime = %f\n",wall_t2-wall_t1);
  op_exit();
}
void op_par_loop_adt_calc(char const *name, op_set set,                 
  op_arg arg0,                                                          
  op_arg arg1,                                                          
  op_arg arg2,                                                          
  op_arg arg3,                                                          
  op_arg arg4,                                                          
  op_arg arg5 ){                                                        
                                                                       
  int nargs   = 6;                                                   
  op_arg args[6] = {arg0,arg1,arg2,arg3,arg4,arg5};                     
                                                                        
  int    ninds   = 1;                                                   
  int    inds[6] = {0,0,0,0,-1,-1};   
  
  int sent[6] = {0,0,0,0,0,0}; 
               
  if(ninds > 0) //indirect loop
  {
      for(int i = 0; i<nargs; i++)
      {
      	  if(args[i].argtype == OP_ARG_DAT)
      	  {
      	      if (OP_diags==1) reset_halo(args[i]);
      	      sent[0] = exchange_halo(args[i]); 
      	      if(sent[0] == 1)wait_all(args[i]);
      	  }
      }
  }
  
  if (OP_diags>2) {                                                     
    printf(" kernel routine with indirection: adt_calc \n");            
  }                                                                     
                                                                        
  // get plan                                                           
                                                                        
  #ifdef OP_PART_SIZE_1                                                 
    int part_size = OP_PART_SIZE_1;                                     
  #else                                                                 
    int part_size = OP_part_size;                                       
  #endif                                                                
                 
  
  op_plan *Plan = op_plan_get(name,set,part_size,nargs,args,ninds,inds);
                                                                        
  // initialise timers                                                  
                                                                        
  double cpu_t1, cpu_t2, wall_t1, wall_t2;                              
  op_timers(&cpu_t1, &wall_t1);                                         
                                                                        
  // set number of threads                                              
                                                                        
#ifdef _OPENMP                                                          
  int nthreads = omp_get_max_threads( );                                
#else                                                                   
  int nthreads = 1;                                                     
#endif                                                                  
                                                                        
  // execute plan                                                       
                                                                        
  int block_offset = 0;                                                 
                                                                        
  for (int col=0; col < Plan->ncolors; col++) {                         
    int nblocks = Plan->ncolblk[col];                                   
                                                                        
#pragma omp parallel for                                                
    for (int blockIdx=0; blockIdx<nblocks; blockIdx++)                  
     op_x86_adt_calc( blockIdx,                                         
       (double *)arg0.data, Plan->ind_maps[0],                           
       Plan->loc_maps[0],                                               
       Plan->loc_maps[1],                                               
       Plan->loc_maps[2],                                               
       Plan->loc_maps[3],                                               
       (double *)arg4.data,                                              
       (double *)arg5.data,                                              
       Plan->ind_sizes,                                                 
       Plan->ind_offs,                                                  
       block_offset,                                                    
       Plan->blkmap,                                                    
       Plan->offset,                                                    
       Plan->nelems,                                                    
       Plan->nthrcol,                                                   
       Plan->thrcol);                                                   
                                                                        
    block_offset += nblocks;                                            
  }             
  
  
  //set dirty bit on direct/indirect datasets with access OP_INC,OP_WRITE, OP_RW
  for(int i = 0; i<nargs; i++)
      if(args[i].argtype == OP_ARG_DAT)
      	set_dirtybit(args[i]);
  
  //performe any global operations
  // - NONE
  
                                                                        
  // update kernel record                                               
                                                                        
  op_timers(&cpu_t2, &wall_t2);                                         
  op_timing_realloc(1);                                                 
  OP_kernels[1].name      = name;                                       
  OP_kernels[1].count    += 1;                                          
  OP_kernels[1].time     += wall_t2 - wall_t1;                          
  OP_kernels[1].transfer  += Plan->transfer;                            
  OP_kernels[1].transfer2 += Plan->transfer2;                           
}                                                                       
示例#9
0
int main(int argc, char **argv) {
  // OP initialisation
  op_init(argc, argv, 5);

  // timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  int nnode, nedge, n, e;

  nnode = (NN - 1) * (NN - 1);
  nedge = (NN - 1) * (NN - 1) + 4 * (NN - 1) * (NN - 2);

  int *pp = (int *)malloc(sizeof(int) * 2 * nedge);

  double *A = (double *)malloc(sizeof(double) * nedge);
  double *r = (double *)malloc(sizeof(double) * nnode);
  double *u = (double *)malloc(sizeof(double) * nnode);
  double *du = (double *)malloc(sizeof(double) * nnode);

  // create matrix and r.h.s., and set coordinates needed for renumbering /
  // partitioning

  e = 0;

  for (int i = 1; i < NN; i++) {
    for (int j = 1; j < NN; j++) {
      n = i - 1 + (j - 1) * (NN - 1);
      r[n] = 0.0f;
      u[n] = 0.0f;
      du[n] = 0.0f;

      pp[2 * e] = n;
      pp[2 * e + 1] = n;
      A[e] = -1.0f;
      e++;

      for (int pass = 0; pass < 4; pass++) {
        int i2 = i;
        int j2 = j;
        if (pass == 0)
          i2 += -1;
        if (pass == 1)
          i2 += 1;
        if (pass == 2)
          j2 += -1;
        if (pass == 3)
          j2 += 1;

        if ((i2 == 0) || (i2 == NN) || (j2 == 0) || (j2 == NN)) {
          r[n] += 0.25f;
        } else {
          pp[2 * e] = n;
          pp[2 * e + 1] = i2 - 1 + (j2 - 1) * (NN - 1);
          A[e] = 0.25f;
          e++;
        }
      }
    }
  }

  // declare sets, pointers, and datasets

  op_set nodes = op_decl_set(nnode, "nodes");
  op_set edges = op_decl_set(nedge, "edges");

  op_map ppedge = op_decl_map(edges, nodes, 2, pp, "ppedge");

  op_dat p_A = op_decl_dat(edges, 1, "double", A, "p_A");
  op_dat p_r = op_decl_dat(nodes, 1, "double", r, "p_r");
  op_dat p_u = op_decl_dat(nodes, 1, "double", u, "p_u");
  op_dat p_du = op_decl_dat(nodes, 1, "double", du, "p_du");

  alpha = 1.0f;
  op_decl_const2("alpha", 1, "double", &alpha);

  op_diagnostic_output();

  // initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  // main iteration loop

  double u_sum, u_max, beta = 1.0f;

  for (int iter = 0; iter < NITER; iter++) {
    op_par_loop_res("res", edges,
                    op_arg_dat(p_A, -1, OP_ID, 1, "double", OP_READ),
                    op_arg_dat(p_u, 1, ppedge, 1, "double", OP_READ),
                    op_arg_dat(p_du, 0, ppedge, 1, "double", OP_INC),
                    op_arg_gbl(&beta, 1, "double", OP_READ));

    u_sum = 0.0f;
    u_max = 0.0f;
    op_par_loop_update("update", nodes,
                       op_arg_dat(p_r, -1, OP_ID, 1, "double", OP_READ),
                       op_arg_dat(p_du, -1, OP_ID, 1, "double", OP_RW),
                       op_arg_dat(p_u, -1, OP_ID, 1, "double", OP_INC),
                       op_arg_gbl(&u_sum, 1, "double", OP_INC),
                       op_arg_gbl(&u_max, 1, "double", OP_MAX));
    op_printf("\n u max/rms = %f %f \n\n", u_max, sqrt(u_sum / nnode));
  }

  op_timers(&cpu_t2, &wall_t2);

  // print out results
  op_printf("\n  Results after %d iterations:\n\n", NITER);

  op_fetch_data(p_u, u);

  for (int pass = 0; pass < 1; pass++) {
    for (int j = NN - 1; j > 0; j--) {
      for (int i = 1; i < NN; i++) {
        if (pass == 0)
          op_printf(" %7.4f", u[i - 1 + (j - 1) * (NN - 1)]);
        else if (pass == 1)
          op_printf(" %7.4f", du[i - 1 + (j - 1) * (NN - 1)]);
        else if (pass == 2)
          op_printf(" %7.4f", r[i - 1 + (j - 1) * (NN - 1)]);
      }
      op_printf("\n");
    }
    op_printf("\n");
  }

  op_timing_output();

  // print total time for niter interations
  op_printf("Max total runtime = %f\n", wall_t2 - wall_t1);

  int result = check_result<double>(u, NN, TOLERANCE);
  op_exit();

  free(pp);
  free(A);
  free(u);
  free(du);
  free(r);

  return result;
}
示例#10
0
void op_par_loop_update(char const *name, op_set set,           
  op_arg arg0,                                                  
  op_arg arg1,                                                  
  op_arg arg2,                                                  
  op_arg arg3,                                                  
  op_arg arg4 ){                                                
   
  int ninds   = 0;    
  int nargs   = 5; 
  op_arg args[5] = {arg0,arg1,arg2,arg3,arg4};
  
  double *arg4h = (double *)arg4.data;                            
                                                                
  if (OP_diags>2) {                                             
    printf(" kernel routine w/o indirection:  update \n");      
  }                                                             
                                                                
  // initialise timers                                          
                                                                
  double cpu_t1, cpu_t2, wall_t1, wall_t2;                      
  op_timers(&cpu_t1, &wall_t1);                                 
                                                                
  // set number of threads                                      
                                                                
#ifdef _OPENMP                                                  
  int nthreads = omp_get_max_threads( );                        
#else                                                           
  int nthreads = 1;                                             
#endif                                                          
                                                                
  // allocate and initialise arrays for global reduction        
                                                                
  double arg4_l[1+64*64];                                        
  for (int thr=0; thr<nthreads; thr++)                          
    for (int d=0; d<1; d++) arg4_l[d+thr*64]=ZERO_double;        
                                                                
  // execute plan                                               
                                                                
#pragma omp parallel for                                        
  for (int thr=0; thr<nthreads; thr++) {                        
    int start  = (set->size* thr   )/nthreads;                  
    int finish = (set->size*(thr+1))/nthreads;                  
    op_x86_update( (double *) arg0.data,                         
                   (double *) arg1.data,                         
                   (double *) arg2.data,                         
                   (double *) arg3.data,                         
                   arg4_l + thr*64,                             
                   start, finish );                             
  }                                                             
                                                                
  // combine reduction data                                     
                                                                
  for (int thr=0; thr<nthreads; thr++)                          
    for(int d=0; d<1; d++) arg4h[d] += arg4_l[d+thr*64];        
     
  //set dirty bit on direct/indirect datasets with access OP_INC,OP_WRITE, OP_RW
  for(int i = 0; i<nargs; i++)
      if(args[i].argtype == OP_ARG_DAT)
      	set_dirtybit(args[i]);
  
  //performe any global operations
  for(int i = 0; i<nargs; i++)
      if(args[i].argtype == OP_ARG_GBL) 
      	global_reduce(&args[i]);
  


  // update kernel record                                       
                                                                
  op_timers(&cpu_t2, &wall_t2);                                 
  op_timing_realloc(4);                                         
  OP_kernels[4].name      = name;                               
  OP_kernels[4].count    += 1;                                  
  OP_kernels[4].time     += wall_t2 - wall_t1;                  
  OP_kernels[4].transfer += (double)set->size * arg0.size;       
  OP_kernels[4].transfer += (double)set->size * arg1.size;       
  OP_kernels[4].transfer += (double)set->size * arg2.size * 2.0f;
  OP_kernels[4].transfer += (double)set->size * arg3.size;       
}                                                               
示例#11
0
void bres_calc_host(const char *userSubroutine,op_set set,op_arg opDat1,op_arg opDat2,op_arg opDat3,op_arg opDat4,op_arg opDat5,op_arg opDat6)
{
  size_t blocksPerGrid;
  size_t threadsPerBlock;
  size_t totalThreadNumber;
  size_t dynamicSharedMemorySize;
  cl_int errorCode;
  cl_event event;
  cl_kernel kernelPointer;
  int i3;
  op_arg opDatArray[6];
  int indirectionDescriptorArray[6];
  op_plan *planRet;
  int blockOffset;
  opDatArray[0] = opDat1;
  opDatArray[1] = opDat2;
  opDatArray[2] = opDat3;
  opDatArray[3] = opDat4;
  opDatArray[4] = opDat5;
  opDatArray[5] = opDat6;
  indirectionDescriptorArray[0] = 0;
  indirectionDescriptorArray[1] = 0;
  indirectionDescriptorArray[2] = 1;
  indirectionDescriptorArray[3] = 2;
  indirectionDescriptorArray[4] = 3;
  indirectionDescriptorArray[5] = -1;
  planRet = op_plan_get(userSubroutine,set,setPartitionSize_bres_calc,6,opDatArray,4,indirectionDescriptorArray);
  cl_mem gm1_d;
  gm1_d = op_allocate_constant(&gm1,sizeof(float ));
  cl_mem qinf_d;
  qinf_d = op_allocate_constant(&qinf,4 * sizeof(float));
  cl_mem eps_d;
  eps_d = op_allocate_constant(&eps,sizeof(float ));
  blockOffset = 0;
  double cpu_t1;
  double cpu_t2;
  double wall_t1;
op_timers(&cpu_t1, &wall_t1);
  double wall_t2;
  for (i3 = 0; i3 < planRet -> ncolors; ++i3) {
    blocksPerGrid = planRet -> ncolblk[i3];
    dynamicSharedMemorySize = planRet -> nshared;
    threadsPerBlock = threadsPerBlockSize_bres_calc;
    totalThreadNumber = threadsPerBlock * blocksPerGrid;
    kernelPointer = getKernel("bres_calc_kernel");
    errorCode = clSetKernelArg(kernelPointer,0,sizeof(cl_mem ),&opDat1.data_d);
    errorCode = errorCode | clSetKernelArg(kernelPointer,1,sizeof(cl_mem ),&opDat3.data_d);
    errorCode = errorCode | clSetKernelArg(kernelPointer,2,sizeof(cl_mem ),&opDat4.data_d);
    errorCode = errorCode | clSetKernelArg(kernelPointer,3,sizeof(cl_mem ),&opDat5.data_d);
    errorCode = errorCode | clSetKernelArg(kernelPointer,4,sizeof(cl_mem ),&opDat6.data_d);
    errorCode = errorCode | clSetKernelArg(kernelPointer,5,sizeof(cl_mem ),&planRet -> ind_maps[0]);
    errorCode = errorCode | clSetKernelArg(kernelPointer,6,sizeof(cl_mem ),&planRet -> ind_maps[1]);
    errorCode = errorCode | clSetKernelArg(kernelPointer,7,sizeof(cl_mem ),&planRet -> ind_maps[2]);
    errorCode = errorCode | clSetKernelArg(kernelPointer,8,sizeof(cl_mem ),&planRet -> ind_maps[3]);
    errorCode = errorCode | clSetKernelArg(kernelPointer,9,sizeof(cl_mem ),&planRet -> loc_maps[0]);
    errorCode = errorCode | clSetKernelArg(kernelPointer,10,sizeof(cl_mem ),&planRet -> loc_maps[1]);
    errorCode = errorCode | clSetKernelArg(kernelPointer,11,sizeof(cl_mem ),&planRet -> loc_maps[2]);
    errorCode = errorCode | clSetKernelArg(kernelPointer,12,sizeof(cl_mem ),&planRet -> loc_maps[3]);
    errorCode = errorCode | clSetKernelArg(kernelPointer,13,sizeof(cl_mem ),&planRet -> loc_maps[4]);
    errorCode = errorCode | clSetKernelArg(kernelPointer,14,sizeof(cl_mem ),&planRet -> ind_sizes);
    errorCode = errorCode | clSetKernelArg(kernelPointer,15,sizeof(cl_mem ),&planRet -> ind_offs);
    errorCode = errorCode | clSetKernelArg(kernelPointer,16,sizeof(cl_mem ),&planRet -> blkmap);
    errorCode = errorCode | clSetKernelArg(kernelPointer,17,sizeof(cl_mem ),&planRet -> offset);
    errorCode = errorCode | clSetKernelArg(kernelPointer,18,sizeof(cl_mem ),&planRet -> nelems);
    errorCode = errorCode | clSetKernelArg(kernelPointer,19,sizeof(cl_mem ),&planRet -> nthrcol);
    errorCode = errorCode | clSetKernelArg(kernelPointer,20,sizeof(cl_mem ),&planRet -> thrcol);
    errorCode = errorCode | clSetKernelArg(kernelPointer,21,sizeof(int ),&blockOffset);
    errorCode = errorCode | clSetKernelArg(kernelPointer,22,dynamicSharedMemorySize,NULL);
    errorCode = errorCode | clSetKernelArg(kernelPointer,23,sizeof(cl_mem ),&gm1_d);
    errorCode = errorCode | clSetKernelArg(kernelPointer,24,sizeof(cl_mem ),&qinf_d);
    errorCode = errorCode | clSetKernelArg(kernelPointer,25,sizeof(cl_mem ),&eps_d);
    assert_m(errorCode == CL_SUCCESS,"Error setting OpenCL kernel arguments");
    errorCode = clEnqueueNDRangeKernel(cqCommandQueue,kernelPointer,1,NULL,&totalThreadNumber,&threadsPerBlock,0,NULL,&event);
    assert_m(errorCode == CL_SUCCESS,"Error executing OpenCL kernel");
    errorCode = clFinish(cqCommandQueue);
    assert_m(errorCode == CL_SUCCESS,"Error completing device command queue");
    blockOffset += blocksPerGrid;
  }
op_timers(&cpu_t2, &wall_t2);
op_timing_realloc(0);
  OP_kernels[1].name = userSubroutine;
  OP_kernels[1].count = OP_kernels[1].count + 1;
}
//#define AUTO_BLOCK_SIZE
void op_par_loop_save_soln(char const *name, op_set set,
  op_arg arg0,
  op_arg arg1 ){
  
  cl_int ciErrNum;
  cl_event ceEvent;



  if (OP_diags>2) {
    printf(" kernel routine w/o indirection:  save_soln \n");
  }

  // initialise timers

  double cpu_t1, cpu_t2, wall_t1, wall_t2;
  op_timers(&cpu_t1, &wall_t1);

  // set CUDA execution parameters

#ifdef AUTO_BLOCK_SIZE
    const size_t nthread = 1024;
#else
  #ifdef OP_BLOCK_SIZE_0
    const size_t nthread = OP_BLOCK_SIZE_0;
  #else
    // int nthread = OP_block_size;
    const size_t nthread = 128;
  #endif
#endif

  const size_t nblocks = 200;
  const size_t n_tot_thread = nblocks * nthread;

  // work out shared memory requirements per element

  int nshared = 0;
  nshared = MAX(nshared,sizeof(float)*4);
  nshared = MAX(nshared,sizeof(float)*4);

  // execute plan

  int offset_s = nshared*OP_WARPSIZE;

  nshared = nshared*nthread;


  cl_kernel hKernel = getKernel( "op_cuda_save_soln" );

  //nshared *= 4;
  //offset_s *= 4;

  int i = 0;
  ciErrNum = clSetKernelArg( hKernel, i++, sizeof(cl_mem), &(arg0.data_d) );
  ciErrNum |= clSetKernelArg( hKernel, i++, sizeof(cl_mem), &(arg1.data_d) );
  ciErrNum |= clSetKernelArg( hKernel, i++, sizeof(int), &offset_s );
  ciErrNum |= clSetKernelArg( hKernel, i++, sizeof(int), &set->size );
  ciErrNum |= clSetKernelArg( hKernel, i++, nshared, NULL );
  assert_m( ciErrNum == CL_SUCCESS, "error setting kernel arguments" );

#ifdef AUTO_BLOCK_SIZE
  ciErrNum = clEnqueueNDRangeKernel( cqCommandQueue, hKernel, 1, NULL, &n_tot_thread, NULL, 0, NULL, &ceEvent );
#else
  ciErrNum = clEnqueueNDRangeKernel( cqCommandQueue, hKernel, 1, NULL, &n_tot_thread, &nthread, 0, NULL, &ceEvent );
#endif
  assert_m( ciErrNum == CL_SUCCESS, "error executing kernel" );

#ifndef ASYNC
  ciErrNum = clFinish( cqCommandQueue );
  assert_m( ciErrNum == CL_SUCCESS, "error completing device commands" );

#ifdef PROFILE
  unsigned long tqueue, tsubmit, tstart, tend, telapsed;
  ciErrNum  = clGetEventProfilingInfo( ceEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(tqueue), &tqueue, NULL );
  ciErrNum |= clGetEventProfilingInfo( ceEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(tsubmit), &tsubmit, NULL );
  ciErrNum |= clGetEventProfilingInfo( ceEvent, CL_PROFILING_COMMAND_START, sizeof(tstart), &tstart, NULL );
  ciErrNum |= clGetEventProfilingInfo( ceEvent, CL_PROFILING_COMMAND_END, sizeof(tend), &tend, NULL );
  assert_m( ciErrNum == CL_SUCCESS, "error getting profiling info" );
  OP_kernels[0].queue_time      += (tsubmit - tqueue);
  OP_kernels[0].wait_time       += (tstart - tsubmit);
  OP_kernels[0].execution_time  += (tend - tstart);
  //printf("%20lu\n%20lu\n%20lu\n%20lu\n\n", tqueue, tsubmit, tstart, tend);
  //printf("queue: %8.4f\nwait:%8.4f\nexec: %8.4f\n\n", OP_kernels[0].queue_time * 1.0e-9, OP_kernels[0].wait_time * 1.0e-9, OP_kernels[0].execution_time * 1.0e-9 );
#endif

  // update kernel record

  op_timers(&cpu_t2, &wall_t2);
  op_timing_realloc(0);
  OP_kernels[0].name      = name;
  OP_kernels[0].count    += 1;
  OP_kernels[0].time     += wall_t2 - wall_t1;
  OP_kernels[0].transfer += (float)set->size * arg0.size;
  OP_kernels[0].transfer += (float)set->size * arg1.size;
#endif
}
示例#13
0
void op_par_loop_update(char const *name, op_set set,                 
  op_arg arg0,                                                        
  op_arg arg1,                                                        
  op_arg arg2,                                                        
  op_arg arg3,                                                        
  op_arg arg4 ){                                                      
                                                                      
  float *arg3h = (float *)arg3.data;                                  
  float *arg4h = (float *)arg4.data;                                  
                                                                      
  if (OP_diags>2) {                                                   
    printf(" kernel routine w/o indirection:  update \n");            
  }                                                                   
                                                                      
  // initialise timers                                                
                                                                      
  double cpu_t1, cpu_t2, wall_t1, wall_t2;                            
  op_timers(&cpu_t1, &wall_t1);                                       
                                                                      
  // set number of threads                                            
                                                                      
#ifdef _OPENMP                                                        
  int nthreads = omp_get_max_threads( );                              
#else                                                                 
  int nthreads = 1;                                                   
#endif                                                                
                                                                      
  // allocate and initialise arrays for global reduction              
                                                                      
  float arg3_l[1+64*64];                                              
  for (int thr=0; thr<nthreads; thr++)                                
    for (int d=0; d<1; d++) arg3_l[d+thr*64]=ZERO_float;              
                                                                      
  float arg4_l[1+64*64];                                              
  for (int thr=0; thr<nthreads; thr++)                                
    for (int d=0; d<1; d++) arg4_l[d+thr*64]=arg4h[d];                
                                                                      
  // execute plan                                                     
                                                                      
#pragma omp parallel for                                              
  for (int thr=0; thr<nthreads; thr++) {                              
    int start  = (set->size* thr   )/nthreads;                        
    int finish = (set->size*(thr+1))/nthreads;                        
    op_x86_update( (float *) arg0.data,                               
                   (float *) arg1.data,                               
                   (float *) arg2.data,                               
                   arg3_l + thr*64,                                   
                   arg4_l + thr*64,                                   
                   start, finish );                                   
  }                                                                   
                                                                      
  // combine reduction data                                           
                                                                      
  for (int thr=0; thr<nthreads; thr++)                                
    for(int d=0; d<1; d++) arg3h[d] += arg3_l[d+thr*64];              
                                                                      
  for (int thr=0; thr<nthreads; thr++)                                
    for(int d=0; d<1; d++) arg4h[d]  = MAX(arg4h[d],arg4_l[d+thr*64]);
                                                                      
  // update kernel record                                             
                                                                      
  op_timers(&cpu_t2, &wall_t2);                                       
  op_timing_realloc(1);                                               
  OP_kernels[1].name      = name;                                     
  OP_kernels[1].count    += 1;                                        
  OP_kernels[1].time     += wall_t2 - wall_t1;                        
  OP_kernels[1].transfer += (float)set->size * arg0.size;             
  OP_kernels[1].transfer += (float)set->size * arg1.size * 2.0f;      
  OP_kernels[1].transfer += (float)set->size * arg2.size * 2.0f;      
}                                                                     
示例#14
0
int main(int argc, char **argv)
{
  // OP initialisation
  op_init(argc,argv,2);

  int    niter;
  double  rms;

  //timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  // set constants and initialise flow field and residual
  op_printf("initialising flow field \n");

  gam = 1.4f;
  gm1 = gam - 1.0f;
  cfl = 0.9f;
  eps = 0.05f;

  double mach  = 0.4f;
  double alpha = 3.0f*atan(1.0f)/45.0f;
  double p     = 1.0f;
  double r     = 1.0f;
  double u     = sqrt(gam*p/r)*mach;
  double e     = p/(r*gm1) + 0.5f*u*u;

  qinf[0] = r;
  qinf[1] = r*u;
  qinf[2] = 0.0f;
  qinf[3] = r*e;

  char file[] = "new_grid.h5";//"new_grid-26mil.h5";

  // declare sets, pointers, datasets and global constants

  op_set nodes  = op_decl_set_hdf5(file, "nodes");
  op_set edges  = op_decl_set_hdf5(file,  "edges");
  op_set bedges = op_decl_set_hdf5(file, "bedges");
  op_set cells  = op_decl_set_hdf5(file,  "cells");

  op_map pedge   = op_decl_map_hdf5(edges, nodes, 2, file, "pedge");
  op_map pecell  = op_decl_map_hdf5(edges, cells,2, file, "pecell");
  op_map pbedge  = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge");
  op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell");
  op_map pcell   = op_decl_map_hdf5(cells, nodes,4, file, "pcell");

  op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int"  ,file,"p_bound");
  op_dat p_x     = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x");
  op_dat p_q     = op_decl_dat_hdf5(cells ,4,"double",file,"p_q");
  op_dat p_qold  = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold");
  op_dat p_adt   = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt");
  op_dat p_res   = op_decl_dat_hdf5(cells ,4,"double",file,"p_res");

  op_decl_const(1,"double",&gam  );
  op_decl_const(1,"double",&gm1  );
  op_decl_const(1,"double",&cfl  );
  op_decl_const(1,"double",&eps  );
  op_decl_const(1,"double",&mach );
  op_decl_const(1,"double",&alpha);
  op_decl_const(4,"double",qinf  );

  op_diagnostic_output();

  int g_ncell = op_get_size(cells);

  //initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  // main time-marching loop

  niter = 1000;

  for(int iter=1; iter<=niter; iter++) {

    //  save old flow solution

    op_par_loop(save_soln,"save_soln", cells,
        op_arg_dat(p_q,   -1,OP_ID, 4,"double",OP_READ ),
        op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE));

    //  predictor/corrector update loop

    for(int k=0; k<2; k++) {

      //    calculate area/timstep

      op_par_loop(adt_calc,"adt_calc",cells,
          op_arg_dat(p_x,   0,pcell, 2,"double",OP_READ ),
          op_arg_dat(p_x,   1,pcell, 2,"double",OP_READ ),
          op_arg_dat(p_x,   2,pcell, 2,"double",OP_READ ),
          op_arg_dat(p_x,   3,pcell, 2,"double",OP_READ ),
          op_arg_dat(p_q,  -1,OP_ID, 4,"double",OP_READ ),
          op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE));

      //    calculate flux residual

      op_par_loop(res_calc,"res_calc",edges,
          op_arg_dat(p_x,    0,pedge, 2,"double",OP_READ),
          op_arg_dat(p_x,    1,pedge, 2,"double",OP_READ),
          op_arg_dat(p_q,    0,pecell,4,"double",OP_READ),
          op_arg_dat(p_q,    1,pecell,4,"double",OP_READ),
          op_arg_dat(p_adt,  0,pecell,1,"double",OP_READ),
          op_arg_dat(p_adt,  1,pecell,1,"double",OP_READ),
          op_arg_dat(p_res,  0,pecell,4,"double",OP_INC ),
          op_arg_dat(p_res,  1,pecell,4,"double",OP_INC ));

      op_par_loop(bres_calc,"bres_calc",bedges,
          op_arg_dat(p_x,     0,pbedge, 2,"double",OP_READ),
          op_arg_dat(p_x,     1,pbedge, 2,"double",OP_READ),
          op_arg_dat(p_q,     0,pbecell,4,"double",OP_READ),
          op_arg_dat(p_adt,   0,pbecell,1,"double",OP_READ),
          op_arg_dat(p_res,   0,pbecell,4,"double",OP_INC ),
          op_arg_dat(p_bound,-1,OP_ID  ,1,"int",  OP_READ));

      //    update flow field

      rms = 0.0;

      op_par_loop(update,"update",cells,
          op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ),
          op_arg_dat(p_q,   -1,OP_ID, 4,"double",OP_WRITE),
          op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW   ),
          op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ),
          op_arg_gbl(&rms,1,"double",OP_INC));
    }

    //  print iteration history

    rms = sqrt(rms/(double)g_ncell);

    if (iter%100 == 0)
      op_printf(" %d  %10.5e \n",iter,rms);
  }

  op_timers(&cpu_t2, &wall_t2);
  op_timing_output();
  op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1);
  op_exit();
}
示例#15
0
int main(int argc, char **argv) {
  // OP initialisation
  op_init(argc, argv, 2);

  // MPI for user I/O
  int my_rank;
  int comm_size;
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);

  // timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  int *becell, *ecell, *bound, *bedge, *edge, *cell;
  double *x, *q, *qold, *adt, *res;

  int nnode, ncell, nedge, nbedge, niter;

  /**------------------------BEGIN I/O and PARTITIONING -------------------**/

  op_timers(&cpu_t1, &wall_t1);

  /* read in grid from disk on root processor */
  FILE *fp;

  if ((fp = fopen("new_grid.dat", "r")) == NULL) {
    op_printf("can't open file new_grid.dat\n");
    exit(-1);
  }

  int g_nnode, g_ncell, g_nedge, g_nbedge;

  check_scan(
      fscanf(fp, "%d %d %d %d \n", &g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4);

  int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0,
      *g_cell = 0;
  double *g_x = 0, *g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0;

  op_printf("reading in grid \n");
  op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n",
            g_nnode, g_ncell, g_nedge, g_nbedge);

  if (my_rank == MPI_ROOT) {
    g_cell = (int *)malloc(4 * g_ncell * sizeof(int));
    g_edge = (int *)malloc(2 * g_nedge * sizeof(int));
    g_ecell = (int *)malloc(2 * g_nedge * sizeof(int));
    g_bedge = (int *)malloc(2 * g_nbedge * sizeof(int));
    g_becell = (int *)malloc(g_nbedge * sizeof(int));
    g_bound = (int *)malloc(g_nbedge * sizeof(int));

    g_x = (double *)malloc(2 * g_nnode * sizeof(double));
    g_q = (double *)malloc(4 * g_ncell * sizeof(double));
    g_qold = (double *)malloc(4 * g_ncell * sizeof(double));
    g_res = (double *)malloc(4 * g_ncell * sizeof(double));
    g_adt = (double *)malloc(g_ncell * sizeof(double));

    for (int n = 0; n < g_nnode; n++) {
      check_scan(fscanf(fp, "%lf %lf \n", &g_x[2 * n], &g_x[2 * n + 1]), 2);
    }

    for (int n = 0; n < g_ncell; n++) {
      check_scan(fscanf(fp, "%d %d %d %d \n", &g_cell[4 * n],
                        &g_cell[4 * n + 1], &g_cell[4 * n + 2],
                        &g_cell[4 * n + 3]),
                 4);
    }

    for (int n = 0; n < g_nedge; n++) {
      check_scan(fscanf(fp, "%d %d %d %d \n", &g_edge[2 * n],
                        &g_edge[2 * n + 1], &g_ecell[2 * n],
                        &g_ecell[2 * n + 1]),
                 4);
    }

    for (int n = 0; n < g_nbedge; n++) {
      check_scan(fscanf(fp, "%d %d %d %d \n", &g_bedge[2 * n],
                        &g_bedge[2 * n + 1], &g_becell[n], &g_bound[n]),
                 4);
    }

    // initialise flow field and residual
  }

  fclose(fp);

  nnode = compute_local_size(g_nnode, comm_size, my_rank);
  ncell = compute_local_size(g_ncell, comm_size, my_rank);
  nedge = compute_local_size(g_nedge, comm_size, my_rank);
  nbedge = compute_local_size(g_nbedge, comm_size, my_rank);

  op_printf(
      "Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n",
      my_rank, nnode, ncell, nedge, nbedge);

  /*Allocate memory to hold local sets, mapping tables and data*/
  cell = (int *)malloc(4 * ncell * sizeof(int));
  edge = (int *)malloc(2 * nedge * sizeof(int));
  ecell = (int *)malloc(2 * nedge * sizeof(int));
  bedge = (int *)malloc(2 * nbedge * sizeof(int));
  becell = (int *)malloc(nbedge * sizeof(int));
  bound = (int *)malloc(nbedge * sizeof(int));

  x = (double *)malloc(2 * nnode * sizeof(double));
  q = (double *)malloc(4 * ncell * sizeof(double));
  qold = (double *)malloc(4 * ncell * sizeof(double));
  res = (double *)malloc(4 * ncell * sizeof(double));
  adt = (double *)malloc(ncell * sizeof(double));

  /* scatter sets, mappings and data on sets*/
  scatter_int_array(g_cell, cell, comm_size, g_ncell, ncell, 4);
  scatter_int_array(g_edge, edge, comm_size, g_nedge, nedge, 2);
  scatter_int_array(g_ecell, ecell, comm_size, g_nedge, nedge, 2);
  scatter_int_array(g_bedge, bedge, comm_size, g_nbedge, nbedge, 2);
  scatter_int_array(g_becell, becell, comm_size, g_nbedge, nbedge, 1);
  scatter_int_array(g_bound, bound, comm_size, g_nbedge, nbedge, 1);

  scatter_double_array(g_x, x, comm_size, g_nnode, nnode, 2);
  scatter_double_array(g_q, q, comm_size, g_ncell, ncell, 4);
  scatter_double_array(g_qold, qold, comm_size, g_ncell, ncell, 4);
  scatter_double_array(g_res, res, comm_size, g_ncell, ncell, 4);
  scatter_double_array(g_adt, adt, comm_size, g_ncell, ncell, 1);

  /*Freeing memory allocated to gloabal arrays on rank 0
    after scattering to all processes*/
  if (my_rank == MPI_ROOT) {
    free(g_cell);
    free(g_edge);
    free(g_ecell);
    free(g_bedge);
    free(g_becell);
    free(g_bound);
    free(g_x);
    free(g_q);
    free(g_qold);
    free(g_adt);
    free(g_res);
  }

  op_timers(&cpu_t2, &wall_t2);
  op_printf("Max total file read time = %f\n", wall_t2 - wall_t1);

  /**------------------------END I/O and PARTITIONING -----------------------**/

  op_set edges = op_decl_set(nedge, "edges");
  op_set cells = op_decl_set(ncell, "cells");

  op_map pecell = op_decl_map(edges, cells, 2, ecell, "pecell");
  op_dat p_res = op_decl_dat(cells, 4, "double", res, "p_res");

  int count;

  // trigger partitioning and halo creation routines
  op_partition("PTSCOTCH", "KWAY", cells, pecell, NULL);

  op_diagnostic_output();

  // initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  // indirect reduction
  count = 0;
  op_par_loop_res_calc("res_calc", edges,
                       op_arg_dat(p_res, 0, pecell, 4, "double", OP_INC),
                       op_arg_gbl(&count, 1, "int", OP_INC));
  op_printf("number of edges:: %d should be: %d \n", count, g_nedge);
  if (count != g_nedge)
    op_printf("indirect reduction FAILED\n");
  else
    op_printf("indirect reduction PASSED\n");
  // direct reduction
  count = 0;
  op_par_loop_update("update", cells,
                     op_arg_dat(p_res, -1, OP_ID, 4, "double", OP_RW),
                     op_arg_gbl(&count, 1, "int", OP_INC));
  op_printf("number of cells: %d should be: %d \n", count, g_ncell);
  if (count != g_ncell)
    op_printf("direct reduction FAILED\n");
  else
    op_printf("direct reduction PASSED\n");

  op_timers(&cpu_t2, &wall_t2);

  op_timing_output();

  op_exit();

  free(cell);
  free(edge);
  free(ecell);
  free(bedge);
  free(becell);
  free(bound);
  free(x);
  free(q);
  free(qold);
  free(res);
  free(adt);
}
示例#16
0
int main(int argc, char **argv) {
  // OP initialisation
  op_init(argc, argv, 2);

  int niter;
  float rms;

  // timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  // set constants and initialise flow field and residual
  op_printf("initialising flow field \n");

  char file[] = "new_grid.h5";

  // declare sets, pointers, datasets and global constants

  op_set nodes = op_decl_set_hdf5(file, "nodes");
  op_set edges = op_decl_set_hdf5(file, "edges");
  op_set bedges = op_decl_set_hdf5(file, "bedges");
  op_set cells = op_decl_set_hdf5(file, "cells");

  op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge");
  op_map pecell = op_decl_map_hdf5(edges, cells, 2, file, "pecell");
  op_map pbedge = op_decl_map_hdf5(bedges, nodes, 2, file, "pbedge");
  op_map pbecell = op_decl_map_hdf5(bedges, cells, 1, file, "pbecell");
  op_map pcell = op_decl_map_hdf5(cells, nodes, 4, file, "pcell");

  op_dat p_bound = op_decl_dat_hdf5(bedges, 1, "int", file, "p_bound");
  op_dat p_x = op_decl_dat_hdf5(nodes, 2, "float", file, "p_x");
  op_dat p_q = op_decl_dat_hdf5(cells, 4, "float", file, "p_q");
  op_dat p_qold = op_decl_dat_hdf5(cells, 4, "float", file, "p_qold");
  op_dat p_adt = op_decl_dat_hdf5(cells, 1, "float", file, "p_adt");
  op_dat p_res = op_decl_dat_hdf5(cells, 4, "float", file, "p_res");

  op_get_const_hdf5("gam", 1, "float", (char *)&gam, "new_grid.h5");
  op_get_const_hdf5("gm1", 1, "float", (char *)&gm1, "new_grid.h5");
  op_get_const_hdf5("cfl", 1, "float", (char *)&cfl, "new_grid.h5");
  op_get_const_hdf5("eps", 1, "float", (char *)&eps, "new_grid.h5");
  op_get_const_hdf5("mach", 1, "float", (char *)&mach, "new_grid.h5");
  op_get_const_hdf5("alpha", 1, "float", (char *)&alpha, "new_grid.h5");
  op_get_const_hdf5("qinf", 4, "float", (char *)&qinf, "new_grid.h5");

  op_decl_const2("gam", 1, "float", &gam);
  op_decl_const2("gm1", 1, "float", &gm1);
  op_decl_const2("cfl", 1, "float", &cfl);
  op_decl_const2("eps", 1, "float", &eps);
  op_decl_const2("mach", 1, "float", &mach);
  op_decl_const2("alpha", 1, "float", &alpha);
  op_decl_const2("qinf", 4, "float", qinf);

  if (op_is_root())
    op_diagnostic_output();

  // trigger partitioning and halo creation routines
  op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x);
  // op_partition("PARMETIS", "KWAY", edges, pecell, p_x);

  int g_ncell = op_get_size(cells);

  // initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  // main time-marching loop

  niter = 1000;

  for (int iter = 1; iter <= niter; iter++) {

    //  save old flow solution

    op_par_loop_save_soln("save_soln", cells,
                          op_arg_dat(p_q, -1, OP_ID, 4, "float", OP_READ),
                          op_arg_dat(p_qold, -1, OP_ID, 4, "float", OP_WRITE));

    //  predictor/corrector update loop

    for (int k = 0; k < 2; k++) {

      //    calculate area/timstep

      op_par_loop_adt_calc("adt_calc", cells,
                           op_arg_dat(p_x, 0, pcell, 2, "float", OP_READ),
                           op_arg_dat(p_x, 1, pcell, 2, "float", OP_READ),
                           op_arg_dat(p_x, 2, pcell, 2, "float", OP_READ),
                           op_arg_dat(p_x, 3, pcell, 2, "float", OP_READ),
                           op_arg_dat(p_q, -1, OP_ID, 4, "float", OP_READ),
                           op_arg_dat(p_adt, -1, OP_ID, 1, "float", OP_WRITE));

      //    calculate flux residual

      op_par_loop_res_calc("res_calc", edges,
                           op_arg_dat(p_x, 0, pedge, 2, "float", OP_READ),
                           op_arg_dat(p_x, 1, pedge, 2, "float", OP_READ),
                           op_arg_dat(p_q, 0, pecell, 4, "float", OP_READ),
                           op_arg_dat(p_q, 1, pecell, 4, "float", OP_READ),
                           op_arg_dat(p_adt, 0, pecell, 1, "float", OP_READ),
                           op_arg_dat(p_adt, 1, pecell, 1, "float", OP_READ),
                           op_arg_dat(p_res, 0, pecell, 4, "float", OP_INC),
                           op_arg_dat(p_res, 1, pecell, 4, "float", OP_INC));

      op_par_loop_bres_calc("bres_calc", bedges,
                            op_arg_dat(p_x, 0, pbedge, 2, "float", OP_READ),
                            op_arg_dat(p_x, 1, pbedge, 2, "float", OP_READ),
                            op_arg_dat(p_q, 0, pbecell, 4, "float", OP_READ),
                            op_arg_dat(p_adt, 0, pbecell, 1, "float", OP_READ),
                            op_arg_dat(p_res, 0, pbecell, 4, "float", OP_INC),
                            op_arg_dat(p_bound, -1, OP_ID, 1, "int", OP_READ));

      //    update flow field

      rms = 0.0;

      op_par_loop_update("update", cells,
                         op_arg_dat(p_qold, -1, OP_ID, 4, "float", OP_READ),
                         op_arg_dat(p_q, -1, OP_ID, 4, "float", OP_WRITE),
                         op_arg_dat(p_res, -1, OP_ID, 4, "float", OP_RW),
                         op_arg_dat(p_adt, -1, OP_ID, 1, "float", OP_READ),
                         op_arg_gbl(&rms, 1, "float", OP_INC));
    }

    //  print iteration history

    rms = sqrtf(rms / (float)g_ncell);

    if (iter % 100 == 0)
      op_printf(" %d  %10.5e \n", iter, rms);
    if (iter % 1000 == 0 &&
        g_ncell == 720000) { // defailt mesh -- for validation testing
      op_printf(" %d  %3.16f \n", iter, rms);
      float diff = fabsf((100.0 * (rms / 0.000105987)) - 100.0);
      op_printf("\n\nTest problem with %d cells is within %3.15E %% of the "
                "expected solution\n",
                720000, diff);
      if (diff < 0.1) {
        op_printf("This test is considered PASSED\n");
      } else {
        op_printf("This test is considered FAILED\n");
      }
    }
  }

  op_timers(&cpu_t2, &wall_t2);

  op_timing_output();
  op_printf("Max total runtime = %f\n", wall_t2 - wall_t1);
  op_exit();
}
示例#17
0
int main(int argc, char **argv)
{
  // OP initialisation

  op_init(argc,argv,2);

  int    *bnode, *cell;
  double  *xm;//, *q;

  int    nnode,ncell,nbnodes,niter;
  double  rms = 1;

  // set constants and initialise flow field and residual

  op_printf("initialising flow field \n");

  double gam = 1.4;
  gm1 = gam - 1.0;
  gm1i = 1.0/gm1;

  wtg1[0] = 0.5;
  wtg1[1] = 0.5;
  xi1[0] = 0.211324865405187;
  xi1[1] = 0.788675134594813;
  Ng1[0] = 0.788675134594813;
  Ng1[1] = 0.211324865405187;
  Ng1[2] = 0.211324865405187;
  Ng1[3] = 0.788675134594813;
  Ng1_xi[0] = -1;
  Ng1_xi[1] = -1;
  Ng1_xi[2] = 1;
  Ng1_xi[3] = 1;
  wtg2[0] = 0.25;
  wtg2[1] = 0.25;
  wtg2[2] = 0.25;
  wtg2[3] = 0.25;
  Ng2[0] = 0.622008467928146; Ng2[1] = 0.166666666666667; Ng2[2] = 0.166666666666667; Ng2[3] = 0.044658198738520;
  Ng2[4] = 0.166666666666667; Ng2[5] = 0.622008467928146; Ng2[6] = 0.044658198738520; Ng2[7] = 0.166666666666667;
  Ng2[8] = 0.166666666666667; Ng2[9] = 0.044658198738520; Ng2[10] = 0.622008467928146; Ng2[11] = 0.166666666666667;
  Ng2[12] = 0.044658198738520; Ng2[13] = 0.166666666666667; Ng2[14] = 0.166666666666667; Ng2[15] = 0.622008467928146;
  Ng2_xi[0] = -0.788675134594813;  Ng2_xi[1] = 0.788675134594813;  Ng2_xi[2] = -0.211324865405187;Ng2_xi[3] = 0.211324865405187;
  Ng2_xi[4] = -0.788675134594813;  Ng2_xi[5] = 0.788675134594813;  Ng2_xi[6] = -0.211324865405187; Ng2_xi[7] = 0.211324865405187;
  Ng2_xi[8] = -0.211324865405187;  Ng2_xi[9] = 0.211324865405187;  Ng2_xi[10] = -0.788675134594813; Ng2_xi[11] = 0.788675134594813;
  Ng2_xi[12] = -0.211324865405187;  Ng2_xi[13] = 0.211324865405187;  Ng2_xi[14] = -0.788675134594813; Ng2_xi[15] = 0.788675134594813;
  Ng2_xi[16] = -0.788675134594813;  Ng2_xi[17] = -0.211324865405187;  Ng2_xi[18] = 0.788675134594813; Ng2_xi[19] = 0.211324865405187;
  Ng2_xi[20] = -0.211324865405187;  Ng2_xi[21] = -0.788675134594813;  Ng2_xi[22] = 0.211324865405187; Ng2_xi[23] = 0.788675134594813;
  Ng2_xi[24] = -0.788675134594813;  Ng2_xi[25] = -0.211324865405187;  Ng2_xi[26] = 0.788675134594813; Ng2_xi[27] = 0.211324865405187;
  Ng2_xi[28] = -0.211324865405187;  Ng2_xi[29] = -0.788675134594813;  Ng2_xi[30] = 0.211324865405187; Ng2_xi[31] = 0.788675134594813;

  minf = 0.1;
  m2 = minf*minf;
  freq = 1;
  kappa = 1;
  nmode = 0;

  mfan = 1.0;

  char file[] = "FE_grid.h5";


  // declare sets, pointers, datasets and global constants

  op_set nodes  = op_decl_set_hdf5(file,  "nodes");
  op_set bnodes = op_decl_set_hdf5(file, "bedges");
  op_set cells  = op_decl_set_hdf5(file,  "cells");

  op_map pbnodes = op_decl_map_hdf5(bnodes,nodes,1,file, "pbedge");
  op_map pcell   = op_decl_map_hdf5(cells, nodes,4,file,  "pcell");

  op_dat p_xm    = op_decl_dat_hdf5(nodes ,2,"double",  file, "p_x");
  op_dat p_phim  = op_decl_dat_hdf5(nodes, 1, "double", file, "p_phim");
  op_dat p_resm  = op_decl_dat_hdf5(nodes, 1, "double", file, "p_resm");
  op_dat p_K     = op_decl_dat_hdf5(cells, 16, "double:soa",file, "p_K");
  op_dat p_V     = op_decl_dat_hdf5(nodes, 1, "double", file, "p_V");
  op_dat p_P     = op_decl_dat_hdf5(nodes, 1, "double", file, "p_P");
  op_dat p_U     = op_decl_dat_hdf5(nodes, 1, "double", file, "p_U");

  op_decl_const2("gam",1,"double",&gam  );
  op_decl_const2("gm1",1,"double",&gm1  );
  op_decl_const2("gm1i",1,"double",&gm1i  );
  op_decl_const2("m2",1,"double",&m2  );
  op_decl_const2("wtg1",2,"double",wtg1  );
  op_decl_const2("xi1",2,"double",xi1  );
  op_decl_const2("Ng1",4,"double",Ng1  );
  op_decl_const2("Ng1_xi",4,"double",Ng1_xi  );
  op_decl_const2("wtg2",4,"double",wtg2  );
  op_decl_const2("Ng2",16,"double",Ng2  );
  op_decl_const2("Ng2_xi",32,"double",Ng2_xi  );
  op_decl_const2("minf",1,"double",&minf  );
  op_decl_const2("freq",1,"double",&freq  );
  op_decl_const2("kappa",1,"double",&kappa  );
  op_decl_const2("nmode",1,"double",&nmode  );
  op_decl_const2("mfan",1,"double",&mfan  );

  op_diagnostic_output();

  op_partition("PTSCOTCH", "KWAY", cells, pcell, p_xm);

  op_printf("nodes: %d cells: %d bnodes: %d\n", nodes->size, cells->size, bnodes->size);
  nnode = op_get_size(nodes);
  ncell = op_get_size(cells);
  nbnodes = op_get_size(bnodes);

  double cpu_t1, cpu_t2, wall_t1, wall_t2;
  op_timers(&cpu_t1, &wall_t1);

  // main time-marching loop

  niter = 20;

  for(int iter=1; iter<=niter; iter++) {

    op_par_loop_res_calc("res_calc",cells,
               op_arg_dat(p_xm,-4,pcell,2,"double",OP_READ),
               op_arg_dat(p_phim,-4,pcell,1,"double",OP_READ),
               op_arg_dat(p_K,-1,OP_ID,16,"double:soa",OP_WRITE),
               op_arg_dat(p_resm,-4,pcell,1,"double",OP_INC));

    op_par_loop_dirichlet("dirichlet",bnodes,
               op_arg_dat(p_resm,0,pbnodes,1,"double",OP_WRITE));

    double c1 = 0;
    double c2 = 0;
    double c3 = 0;
    double alpha = 0;
    double beta = 0;

    //c1 = R'*R;
    op_par_loop_init_cg("init_cg",nodes,
               op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_READ),
               op_arg_gbl(&c1,1,"double",OP_INC),
               op_arg_dat(p_U,-1,OP_ID,1,"double",OP_WRITE),
               op_arg_dat(p_V,-1,OP_ID,1,"double",OP_WRITE),
               op_arg_dat(p_P,-1,OP_ID,1,"double",OP_WRITE));

    //set up stopping conditions
    double res0 = sqrt(c1);
    double res = res0;
    int iter = 0;
    int maxiter = 200;
    while (res > 0.1*res0 && iter < maxiter) {
      //V = Stiffness*P
      op_par_loop_spMV("spMV",cells,
                 op_arg_dat(p_V,-4,pcell,1,"double",OP_INC),
                 op_arg_dat(p_K,-1,OP_ID,16,"double:soa",OP_READ),
                 op_arg_dat(p_P,-4,pcell,1,"double",OP_READ));

      op_par_loop_dirichlet("dirichlet",bnodes,
                 op_arg_dat(p_V,0,pbnodes,1,"double",OP_WRITE));

      c2 = 0;

      //c2 = P'*V;
      op_par_loop_dotPV("dotPV",nodes,
                 op_arg_dat(p_P,-1,OP_ID,1,"double",OP_READ),
                 op_arg_dat(p_V,-1,OP_ID,1,"double",OP_READ),
                 op_arg_gbl(&c2,1,"double",OP_INC));

      alpha = c1/c2;

      //U = U + alpha*P;
      //resm = resm-alpha*V;
      op_par_loop_updateUR("updateUR",nodes,
                 op_arg_dat(p_U,-1,OP_ID,1,"double",OP_INC),
                 op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_INC),
                 op_arg_dat(p_P,-1,OP_ID,1,"double",OP_READ),
                 op_arg_dat(p_V,-1,OP_ID,1,"double",OP_RW),
                 op_arg_gbl(&alpha,1,"double",OP_READ));

      c3 = 0;

      //c3 = resm'*resm;
      op_par_loop_dotR("dotR",nodes,
                 op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_READ),
                 op_arg_gbl(&c3,1,"double",OP_INC));
      beta = c3/c1;
      //P = beta*P+resm;
      op_par_loop_updateP("updateP",nodes,
                 op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_READ),
                 op_arg_dat(p_P,-1,OP_ID,1,"double",OP_RW),
                 op_arg_gbl(&beta,1,"double",OP_READ));
      c1 = c3;
      res = sqrt(c1);
      iter++;
    }
    rms = 0;
    //phim = phim - Stiffness\Load;
    op_par_loop_update("update",nodes,
               op_arg_dat(p_phim,-1,OP_ID,1,"double",OP_RW),
               op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_WRITE),
               op_arg_dat(p_U,-1,OP_ID,1,"double",OP_READ),
               op_arg_gbl(&rms,1,"double",OP_INC));
    op_printf("rms = %10.5e iter: %d\n", sqrt(rms)/sqrt(nnode), iter);
  }

  op_timing_output();
  op_timers(&cpu_t2, &wall_t2);
  op_printf("Max total runtime = %f\n",wall_t2-wall_t1);
  op_exit();
}
示例#18
0
int main(int argc, char **argv)
{
  // OP initialisation
  op_init(argc,argv,2);

  int    niter;
  double  rms;

  //timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  // set constants and initialise flow field and residual
  op_printf("initialising flow field \n");

  char file[] = "new_grid.h5";

  // declare sets, pointers, datasets and global constants

  op_set nodes  = op_decl_set_hdf5(file, "nodes");
  op_set edges  = op_decl_set_hdf5(file,  "edges");
  op_set bedges = op_decl_set_hdf5(file, "bedges");
  op_set cells  = op_decl_set_hdf5(file,  "cells");

  op_map pedge   = op_decl_map_hdf5(edges, nodes, 2, file, "pedge");
  op_map pecell  = op_decl_map_hdf5(edges, cells,2, file, "pecell");
  op_map pbedge  = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge");
  op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell");
  op_map pcell   = op_decl_map_hdf5(cells, nodes,4, file, "pcell");

  op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int"  ,file,"p_bound");
  op_dat p_x     = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x");
  op_dat p_q     = op_decl_dat_hdf5(cells ,4,"double",file,"p_q");
  op_dat p_qold  = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold");
  op_dat p_adt   = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt");
  op_dat p_res   = op_decl_dat_hdf5(cells ,4,"double",file,"p_res");

  op_get_const_hdf5("gam", 1, "double", (char *)&gam, "new_grid.h5");
  op_get_const_hdf5("gm1", 1, "double", (char *)&gm1, "new_grid.h5");
  op_get_const_hdf5("cfl", 1, "double", (char *)&cfl, "new_grid.h5");
  op_get_const_hdf5("eps", 1, "double", (char *)&eps, "new_grid.h5");
  op_get_const_hdf5("mach", 1, "double", (char *)&mach, "new_grid.h5");
  op_get_const_hdf5("alpha", 1, "double", (char *)&alpha, "new_grid.h5");
  op_get_const_hdf5("qinf", 4, "double", (char *)&qinf, "new_grid.h5");

  op_decl_const(1,"double",&gam  );
  op_decl_const(1,"double",&gm1  );
  op_decl_const(1,"double",&cfl  );
  op_decl_const(1,"double",&eps  );
  op_decl_const(1,"double",&mach );
  op_decl_const(1,"double",&alpha);
  op_decl_const(4,"double",qinf  );

  op_diagnostic_output();

  //write back original data just to compare you read the file correctly
  //do an h5diff between new_grid_out.h5 and new_grid.h5 to
  //compare two hdf5 files
  op_write_hdf5("new_grid_out.h5");

  op_write_const_hdf5("gam",1,"double",(char *)&gam,  "new_grid_out.h5");
  op_write_const_hdf5("gm1",1,"double",(char *)&gm1,  "new_grid_out.h5");
  op_write_const_hdf5("cfl",1,"double",(char *)&cfl,  "new_grid_out.h5");
  op_write_const_hdf5("eps",1,"double",(char *)&eps,  "new_grid_out.h5");
  op_write_const_hdf5("mach",1,"double",(char *)&mach,  "new_grid_out.h5");
  op_write_const_hdf5("alpha",1,"double",(char *)&alpha,  "new_grid_out.h5");
  op_write_const_hdf5("qinf",4,"double",(char *)qinf,  "new_grid_out.h5");

  //trigger partitioning and halo creation routines
  op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x);

  int g_ncell = op_get_size(cells);


  //initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  // main time-marching loop

  niter = 1000;

  for(int iter=1; iter<=niter; iter++) {

    //  save old flow solution

    op_par_loop(save_soln,"save_soln", cells,
        op_arg_dat(p_q,   -1,OP_ID, 4,"double",OP_READ ),
        op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE));

    //  predictor/corrector update loop

    for(int k=0; k<2; k++) {

      //    calculate area/timstep

      op_par_loop(adt_calc,"adt_calc",cells,
                  op_arg_dat(p_x,  -4,pcell, 2,"double",OP_READ ),
                  op_arg_dat(p_q,  -1,OP_ID, 4,"double",OP_READ ),
                  op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE));

      //    calculate flux residual

      op_par_loop(res_calc,"res_calc",edges,
                  op_arg_dat(p_x,   -2,pedge, 2,"double",OP_READ),
                  op_arg_dat(p_q,   -2,pecell,4,"double",OP_READ),
                  op_arg_dat(p_adt, -2,pecell,1,"double",OP_READ),
                  op_arg_dat(p_res, -2,pecell,4,"double",OP_INC ));

      op_par_loop(bres_calc,"bres_calc",bedges,
                  op_arg_dat(p_x,    -2,pbedge, 2,"double",OP_READ),
                  op_arg_dat(p_q,     0,pbecell,4,"double",OP_READ),
                  op_arg_dat(p_adt,   0,pbecell,1,"double",OP_READ),
                  op_arg_dat(p_res,   0,pbecell,4,"double",OP_INC ),
                  op_arg_dat(p_bound,-1,OP_ID  ,1,"int",  OP_READ));

      //    update flow field

      rms = 0.0;

      op_par_loop(update,"update",cells,
                  op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ),
                  op_arg_dat(p_q,   -1,OP_ID, 4,"double",OP_WRITE),
                  op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW   ),
                  op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ),
                  op_arg_gbl(&rms,1,"double",OP_INC));
    }

    //  print iteration history

    rms = sqrt(rms/(double)g_ncell);

    if (iter%100 == 0)
      op_printf(" %d  %10.5e \n",iter,rms);
  }

  op_timers(&cpu_t2, &wall_t2);

  op_timing_output();
  op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1);
  op_exit();
}
示例#19
0
//
// main program
//
int main(int argc, char **argv){
    
    int my_rank;
    int comm_size;
	
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
    MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
	
    //timer
    double cpu_t1, cpu_t2, wall_t1, wall_t2;
    double time;
    double max_time;
	
    int    *becell, *ecell,  *bound, *bedge, *edge, *cell;
    double  *x, *q, *qold, *adt, *res;
  
    int    niter;
    double  rms;
    
    op_timers(&cpu_t1, &wall_t1);
    
    // set constants
    if(my_rank == MPI_ROOT )printf("initialising flow field\n");
    gam = 1.4f;
    gm1 = gam - 1.0f;
    cfl = 0.9f;
    eps = 0.05f;

    double mach  = 0.4f;
    double alpha = 3.0f*atan(1.0f)/45.0f;  
    double p     = 1.0f;
    double r     = 1.0f;
    double u     = sqrt(gam*p/r)*mach;
    double e     = p/(r*gm1) + 0.5f*u*u;

    qinf[0] = r;
    qinf[1] = r*u;
    qinf[2] = 0.0f;
    qinf[3] = r*e;
	
    // OP initialisation
    op_init(argc,argv,2);

    /**------------------------BEGIN Parallel I/O -------------------**/
    
    char file[] = "new_grid.h5";//"new_grid-26mil.h5";//"new_grid.h5";
    
    // declare sets, pointers, datasets and global constants - reading in from file
    op_set nodes  = op_decl_set_hdf5(file, "nodes");
    op_set edges  = op_decl_set_hdf5(file,  "edges");
    op_set bedges = op_decl_set_hdf5(file, "bedges");
    op_set cells  = op_decl_set_hdf5(file,  "cells");

    op_map pedge   = op_decl_map_hdf5(edges, nodes, 2, file, "pedge");
    op_map pecell  = op_decl_map_hdf5(edges, cells,2, file, "pecell");
    op_map pbedge  = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge");
    op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell");
    op_map pcell   = op_decl_map_hdf5(cells, nodes,4, file, "pcell");

    op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int"  ,file,"p_bound");
    op_dat p_x     = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x");
    op_dat p_q     = op_decl_dat_hdf5(cells ,4,"double",file,"p_q");
    op_dat p_qold  = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold");
    op_dat p_adt   = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt");
    op_dat p_res   = op_decl_dat_hdf5(cells ,4,"double",file,"p_res");

    /**------------------------END Parallel I/O  -----------------------**/
    
    op_timers(&cpu_t2, &wall_t2); 
    time = wall_t2-wall_t1;
    MPI_Reduce(&time,&max_time,1,MPI_DOUBLE, MPI_MAX,MPI_ROOT, MPI_COMM_WORLD);
    if(my_rank==MPI_ROOT)printf("Max total file read time = %f\n",max_time); 

    op_decl_const(1,"double",&gam  );
    op_decl_const(1,"double",&gm1  );
    op_decl_const(1,"double",&cfl  );
    op_decl_const(1,"double",&eps  );
    op_decl_const(1,"double",&mach );
    op_decl_const(1,"double",&alpha);
    op_decl_const(4,"double",qinf  );

    op_diagnostic_output();

    //write back original data just to compare you read the file correctly 
    //do an h5diff between new_grid_writeback.h5 and new_grid.h5 to 
    //compare two hdf5 files 
    op_write_hdf5("new_grid_out.h5");
    
    //partition with ParMetis
    //op_partition_geom(p_x);
    //op_partition_random(cells);
    //op_partition_kway(pecell);
    //op_partition_geomkway(p_x, pcell);
        
    //partition with PT-Scotch
    op_partition_ptscotch(pecell);
    
    //create halos
    op_halo_create();    
    
    int g_ncell = 0;
    int* sizes = (int *)malloc(sizeof(int)*comm_size);
    MPI_Allgather(&cells->size, 1, MPI_INT, sizes, 1, MPI_INT, MPI_COMM_WORLD);
    for(int i = 0; i<comm_size; i++)g_ncell = g_ncell + sizes[i];
    free(sizes);
    
    //initialise timers for total execution wall time
    op_timers(&cpu_t1, &wall_t1); 
    
    niter = 1000;
    for(int iter=1; iter<=niter; iter++) {
    	
    	//save old flow solution
    	op_par_loop(save_soln,"save_soln", cells,
    	    op_arg_dat(p_q,   -1,OP_ID, 4,"double",OP_READ ),
    	    op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE));

    	//  predictor/corrector update loop

    	for(int k=0; k<2; k++) {
    	   
    	    //    calculate area/timstep
    	    op_par_loop(adt_calc,"adt_calc",cells,
                  op_arg_dat(p_x,   0,pcell, 2,"double",OP_READ ),
                  op_arg_dat(p_x,   1,pcell, 2,"double",OP_READ ),
                  op_arg_dat(p_x,   2,pcell, 2,"double",OP_READ ),
                  op_arg_dat(p_x,   3,pcell, 2,"double",OP_READ ),
                  op_arg_dat(p_q,  -1,OP_ID, 4,"double",OP_READ ),
                  op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE));
                        
            //    calculate flux residual
            op_par_loop(res_calc,"res_calc",edges,
                  op_arg_dat(p_x,    0,pedge, 2,"double",OP_READ),
                  op_arg_dat(p_x,    1,pedge, 2,"double",OP_READ),
                  op_arg_dat(p_q,    0,pecell,4,"double",OP_READ),
                  op_arg_dat(p_q,    1,pecell,4,"double",OP_READ),
                  op_arg_dat(p_adt,  0,pecell,1,"double",OP_READ),
                  op_arg_dat(p_adt,  1,pecell,1,"double",OP_READ),
                  op_arg_dat(p_res,  0,pecell,4,"double",OP_INC ),
                  op_arg_dat(p_res,  1,pecell,4,"double",OP_INC ));
            
            op_par_loop(bres_calc,"bres_calc",bedges,
                  op_arg_dat(p_x,     0,pbedge, 2,"double",OP_READ),
                  op_arg_dat(p_x,     1,pbedge, 2,"double",OP_READ),
                  op_arg_dat(p_q,     0,pbecell,4,"double",OP_READ),
                  op_arg_dat(p_adt,   0,pbecell,1,"double",OP_READ),
                  op_arg_dat(p_res,   0,pbecell,4,"double",OP_INC ),
                  op_arg_dat(p_bound,-1,OP_ID  ,1,"int",  OP_READ));
            
            //    update flow field

            rms = 0.0;

            op_par_loop(update,"update",cells,
                  op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ),
                  op_arg_dat(p_q,   -1,OP_ID, 4,"double",OP_WRITE),
                  op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW   ),
                  op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ),
                  op_arg_gbl(&rms,1,"double",OP_INC));
           
        }
        //print iteration history
        if(my_rank==MPI_ROOT)
        {
            rms = sqrt(rms/(double) g_ncell);
            if (iter%100 == 0)
            	printf("%d  %10.5e \n",iter,rms);
        }
        
    }
    op_timers(&cpu_t2, &wall_t2);
    
    //get results data array
    op_dat temp = op_mpi_get_data(p_q);
       
    //output the result dat array to files 
    //op_write_hdf5("new_grid_out.h5");
    
    //compress using
    // ~/hdf5/bin/h5repack -f GZIP=9 new_grid.h5 new_grid_pack.h5
    
    //free memory allocated to halos
    op_halo_destroy(); 
        
    //return all op_dats, op_maps back to original element order
    op_partition_reverse(); 
    
    //print each mpi process's timing info for each kernel
    op_mpi_timing_output();
    //print total time for niter interations
    time = wall_t2-wall_t1;
    MPI_Reduce(&time,&max_time,1,MPI_DOUBLE, MPI_MAX,MPI_ROOT, MPI_COMM_WORLD);
    if(my_rank==MPI_ROOT)printf("Max total runtime = %f\n",max_time);    
    
    op_exit();
    MPI_Finalize();   //user mpi finalize
}
示例#20
0
void op_par_loop_res(char const *name, op_set set,                      
  op_arg arg0,                                                          
  op_arg arg1,                                                          
  op_arg arg2,                                                          
  op_arg arg3 ){                                                        
                                                                        
  float *arg3h = (float *)arg3.data;                                    
                                                                        
  int    nargs   = 4;                                                   
  op_arg args[4] = {arg0,arg1,arg2,arg3};                               
                                                                        
  int    ninds   = 2;                                                   
  int    inds[4] = {-1,0,1,-1};                                         
                                                                        
  if (OP_diags>2) {                                                     
    printf(" kernel routine with indirection: res \n");                 
  }                                                                     
                                                                        
  // get plan                                                           
                                                                        
  #ifdef OP_PART_SIZE_0                                                 
    int part_size = OP_PART_SIZE_0;                                     
  #else                                                                 
    int part_size = OP_part_size;                                       
  #endif                                                                
                                                                        
  op_plan *Plan = op_plan_get(name,set,part_size,nargs,args,ninds,inds);
                                                                        
  // initialise timers                                                  
                                                                        
  double cpu_t1, cpu_t2, wall_t1, wall_t2;                              
  op_timers(&cpu_t1, &wall_t1);                                         
                                                                        
  // set number of threads                                              
                                                                        
#ifdef _OPENMP                                                          
  int nthreads = omp_get_max_threads( );                                
#else                                                                   
  int nthreads = 1;                                                     
#endif                                                                  
                                                                        
  // execute plan                                                       
                                                                        
  int block_offset = 0;                                                 
                                                                        
  for (int col=0; col < Plan->ncolors; col++) {                         
    int nblocks = Plan->ncolblk[col];                                   
                                                                        
#pragma omp parallel for                                                
    for (int blockIdx=0; blockIdx<nblocks; blockIdx++)                  
     op_x86_res( blockIdx,                                              
       (float *)arg1.data, Plan->ind_maps[0],                           
       (float *)arg2.data, Plan->ind_maps[1],                           
       (float *)arg0.data,                                              
       Plan->loc_maps[1],                                               
       Plan->loc_maps[2],                                               
       (float *)arg3.data,                                              
       Plan->ind_sizes,                                                 
       Plan->ind_offs,                                                  
       block_offset,                                                    
       Plan->blkmap,                                                    
       Plan->offset,                                                    
       Plan->nelems,                                                    
       Plan->nthrcol,                                                   
       Plan->thrcol);                                                   
                                                                        
    block_offset += nblocks;                                            
  }                                                                     
                                                                        
  // update kernel record                                               
                                                                        
  op_timers(&cpu_t2, &wall_t2);                                         
  op_timing_realloc(0);                                                 
  OP_kernels[0].name      = name;                                       
  OP_kernels[0].count    += 1;                                          
  OP_kernels[0].time     += wall_t2 - wall_t1;                          
  OP_kernels[0].transfer  += Plan->transfer;                            
  OP_kernels[0].transfer2 += Plan->transfer2;                           
}                                                                       
示例#21
0
int main(int argc, char **argv)
{
  MPI_Init(&argc, &argv);
  int rank, size;
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);

  int *groups = (int *)malloc(size * sizeof(int));
  int *groups2 = (int *)malloc(size * sizeof(int));
  int my_type = 1; //This is to be read from a configuration file
  MPI_Allgather(&my_type, 1, MPI_INT, groups, 1, MPI_INT, MPI_COMM_WORLD);

  int num_groups = 0;
  for (int i = 0; i < size; i++) num_groups = num_groups > groups[i] ? num_groups : groups[i];
  num_groups++;

  //The global group
  MPI_Group global_grp;
  MPI_Comm_group(MPI_COMM_WORLD, &global_grp);

  //Create sub-groups and sub-communicators
  MPI_Group mpigroups[num_groups];
  MPI_Comm mpicomms[num_groups];
  int count = 0;
  for (int i = 0; i < num_groups; ++i) {
    count = 0;
    for (int j = 0; j < size; ++j) {
      if (groups[j] == i) {
        groups2[count++] = j;
      }
    }
    MPI_Group_incl(global_grp, count, groups2, &mpigroups[i]);
    MPI_Comm_create(MPI_COMM_WORLD, mpigroups[i], &mpicomms[i]);
  }

  //coupling procs
  for (int i = 0; i < 1; ++i) {
    count = 0;
    for (int j = 0; j < size; ++j) {
      if (groups[j] == i) {
        groups2[count++] = j;
      }
    }
  }

  // OP initialisation
  op_mpi_init(argc,argv,2,MPI_COMM_WORLD, mpicomms[1]);

  int    niter;
  double  rms;

  //timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  // set constants and initialise flow field and residual
  op_printf("initialising flow field \n");

  char file[] = "new_grid.h5";

  // declare sets, pointers, datasets and global constants

  op_set nodes  = op_decl_set_hdf5(file, "nodes");
  op_set edges  = op_decl_set_hdf5(file,  "edges");
  op_set bedges = op_decl_set_hdf5(file, "bedges");
  op_set cells  = op_decl_set_hdf5(file,  "cells");

  op_map pedge   = op_decl_map_hdf5(edges, nodes, 2, file, "pedge");
  op_map pecell  = op_decl_map_hdf5(edges, cells,2, file, "pecell");
  op_map pbedge  = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge");
  op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell");
  op_map pcell   = op_decl_map_hdf5(cells, nodes,4, file, "pcell");
  op_map pbndbnd   = op_decl_map_hdf5(bedges, bedges,1, file, "pbndbnd");

  op_map m_test  = op_decl_map_hdf5(cells, nodes,4, file, "m_test");
  if (m_test == NULL) printf("m_test not found\n");

  op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int"  ,file,"p_bound");
  op_dat p_x     = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x");
  op_dat p_q     = op_decl_dat_hdf5(cells ,4,"double",file,"p_q");
  op_dat p_qold  = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold");
  op_dat p_adt   = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt");
  op_dat p_res   = op_decl_dat_hdf5(cells ,4,"double",file,"p_res");

  op_dat p_test  = op_decl_dat_hdf5(cells ,4,"double",file,"p_test");
  if (p_test == NULL) printf("p_test not found\n");

  op_get_const_hdf5("gam", 1, "double", (char *)&gam, "new_grid.h5");
  op_get_const_hdf5("gm1", 1, "double", (char *)&gm1, "new_grid.h5");
  op_get_const_hdf5("cfl", 1, "double", (char *)&cfl, "new_grid.h5");
  op_get_const_hdf5("eps", 1, "double", (char *)&eps, "new_grid.h5");
  op_get_const_hdf5("mach", 1, "double", (char *)&mach, "new_grid.h5");
  op_get_const_hdf5("alpha", 1, "double", (char *)&alpha, "new_grid.h5");
  op_get_const_hdf5("qinf", 4, "double", (char *)&qinf, "new_grid.h5");

  op_decl_const(1,"double",&gam  );
  op_decl_const(1,"double",&gm1  );
  op_decl_const(1,"double",&cfl  );
  op_decl_const(1,"double",&eps  );
  op_decl_const(1,"double",&mach );
  op_decl_const(1,"double",&alpha);
  op_decl_const(4,"double",qinf  );

  op_diagnostic_output();

  //write back original data just to compare you read the file correctly
  //do an h5diff between new_grid_out.h5 and new_grid.h5 to
  //compare two hdf5 files
  op_dump_to_hdf5("new_grid_out.h5");

  op_write_const_hdf5("gam",1,"double",(char *)&gam,  "new_grid_out.h5");
  op_write_const_hdf5("gm1",1,"double",(char *)&gm1,  "new_grid_out.h5");
  op_write_const_hdf5("cfl",1,"double",(char *)&cfl,  "new_grid_out.h5");
  op_write_const_hdf5("eps",1,"double",(char *)&eps,  "new_grid_out.h5");
  op_write_const_hdf5("mach",1,"double",(char *)&mach,  "new_grid_out.h5");
  op_write_const_hdf5("alpha",1,"double",(char *)&alpha,  "new_grid_out.h5");
  op_write_const_hdf5("qinf",4,"double",(char *)qinf,  "new_grid_out.h5");

  //trigger partitioning and halo creation routines
  op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x);
  //op_partition("PARMETIS", "KWAY", edges, pecell, p_x);

  int g_ncell = op_get_size(cells);

  //create some temporaries so we can exchange data defined on the boundary
  double *ptr = NULL;
  op_dat center = op_decl_dat_temp(bedges, 3, "double", ptr, "center");
  op_dat pres = op_decl_dat_temp(bedges, 1, "double", ptr, "pres");

  int *ptr2 = NULL;
  op_dat p_bound2 = op_decl_dat_temp(bedges, 1, "int", ptr2, "p_bound2");
  op_dat center2 = op_decl_dat_temp(bedges, 3, "double", ptr, "center2");
  op_dat pres2 = op_decl_dat_temp(bedges, 1, "double", ptr, "pres2");

  //create import and export handles
  op_export_handle handle = op_export_init(count, groups2, pbndbnd);
  op_import_handle handle2 = op_import_init(count, groups2, center);

  //initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  // main time-marching loop

  niter = 1000;

  for(int iter=1; iter<=niter; iter++) {

    //  save old flow solution

    op_par_loop(save_soln,"save_soln", cells,
        op_arg_dat(p_q,   -1,OP_ID, 4,"double",OP_READ ),
        op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE));

    //  predictor/corrector update loop

    for(int k=0; k<2; k++) {

      //    calculate area/timstep

      op_par_loop(adt_calc,"adt_calc",cells,
          op_arg_dat(p_x,   0,pcell, 2,"double",OP_READ ),
          op_arg_dat(p_x,   1,pcell, 2,"double",OP_READ ),
          op_arg_dat(p_x,   2,pcell, 2,"double",OP_READ ),
          op_arg_dat(p_x,   3,pcell, 2,"double",OP_READ ),
          op_arg_dat(p_q,  -1,OP_ID, 4,"double",OP_READ ),
          op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE));

      //    calculate flux residual

      op_par_loop(res_calc,"res_calc",edges,
          op_arg_dat(p_x,    0,pedge, 2,"double",OP_READ),
          op_arg_dat(p_x,    1,pedge, 2,"double",OP_READ),
          op_arg_dat(p_q,    0,pecell,4,"double",OP_READ),
          op_arg_dat(p_q,    1,pecell,4,"double",OP_READ),
          op_arg_dat(p_adt,  0,pecell,1,"double",OP_READ),
          op_arg_dat(p_adt,  1,pecell,1,"double",OP_READ),
          op_arg_dat(p_res,  0,pecell,4,"double",OP_INC ),
          op_arg_dat(p_res,  1,pecell,4,"double",OP_INC ));

      op_par_loop(bres_calc,"bres_calc",bedges,
          op_arg_dat(p_x,     0,pbedge, 2,"double",OP_READ),
          op_arg_dat(p_x,     1,pbedge, 2,"double",OP_READ),
          op_arg_dat(p_q,     0,pbecell,4,"double",OP_READ),
          op_arg_dat(p_adt,   0,pbecell,1,"double",OP_READ),
          op_arg_dat(p_res,   0,pbecell,4,"double",OP_INC ),
          op_arg_dat(p_bound,-1,OP_ID  ,1,"int",  OP_READ),
          op_arg_dat(center, -1, OP_ID, 3, "double", OP_WRITE),
          op_arg_dat(pres, -1, OP_ID, 1, "double", OP_WRITE));

      //    update flow field

      rms = 0.0;

      op_par_loop(update,"update",cells,
          op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ),
          op_arg_dat(p_q,   -1,OP_ID, 4,"double",OP_WRITE),
          op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW   ),
          op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ),
          op_arg_gbl(&rms,1,"double",OP_INC));
    }

    //  print iteration history

    rms = sqrt(rms/(double)g_ncell);

    if (iter%100 == 0) {
      op_printf(" %d  %10.5e \n",iter,rms);
      //Export data
      op_dat arr[] = {p_bound, center, pres};
      op_export_data(handle, 3, arr);
      //Import data
      op_dat arr2[] = {p_bound2, center2, pres2};
      op_import_data(handle2, 3, arr2);
      //check whether the two are the same
      op_par_loop(comparethem, "comparethem", bedges,
          op_arg_dat(p_bound,-1, OP_ID, 1, "int", OP_READ),
          op_arg_dat(p_bound2,-1, OP_ID, 1, "int", OP_READ),
          op_arg_dat(center,-1, OP_ID, 3, "double", OP_READ),
          op_arg_dat(center2,-1, OP_ID, 3, "double", OP_READ),
          op_arg_dat(pres,-1, OP_ID, 1, "double", OP_READ),
          op_arg_dat(pres2,-1, OP_ID, 1, "double", OP_READ));
    }
  }

  op_timers(&cpu_t2, &wall_t2);

  double* q = (double *)malloc(sizeof(double)*op_get_size(cells)*4);
  op_fetch_data_hdf5(p_q, q, 0, op_get_size(cells)-1);
  free(q);

  op_fetch_data_hdf5_file(p_q, "file_name.h5");

  //printf("Root process = %d\n",op_is_root());

  //output the result dat array to files
  //op_write_hdf5("new_grid_out.h5");

  //compress using
  // ~/hdf5/bin/h5repack -f GZIP=9 new_grid.h5 new_grid_pack.h5

  op_timing_output();
  op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1);
  op_exit();
}
示例#22
0
int main(int argc, char **argv)
{
  // OP initialisation

  op_init(argc,argv,2);

  //MPI for user I/O
  int my_rank;
  int comm_size;
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);

  //timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  int    *bnode, *cell, *g_bnode, *g_cell;
  double  *xm, *g_xm;;

  int    nnode,ncell,nbnodes,niter, g_nnode, g_ncell, g_nbnodes;
  double  rms = 1;

  // read in grid

  op_printf("reading in grid \n");

  FILE *fp;
  if ( (fp = fopen("FE_grid.dat","r")) == NULL) {
    op_printf("can't open file FE_grid.dat\n"); exit(-1);
  }

  if (fscanf(fp,"%d %d %d \n",&g_nnode, &g_ncell, &g_nbnodes) != 3) {
    op_printf("error reading from new_grid.dat\n"); exit(-1);
  }

  if (my_rank == MPI_ROOT) {
    g_cell   = (int *) malloc(4*g_ncell*sizeof(int));
    g_bnode   = (int *) malloc(g_nbnodes*sizeof(int));
    g_xm      = (double *) malloc(2*g_nnode*sizeof(double));

    for (int n=0; n<g_nnode; n++) {
      if (fscanf(fp,"%lf %lf \n",&g_xm[2*n], &g_xm[2*n+1]) != 2) {
        op_printf("error reading from new_grid.dat\n"); exit(-1);
      }
    }

    for (int n=0; n<g_ncell; n++) {
      if (fscanf(fp,"%d %d %d %d \n",&g_cell[4*n  ], &g_cell[4*n+1],
      &g_cell[4*n+2], &g_cell[4*n+3]) != 4) {
        op_printf("error reading from new_grid.dat\n"); exit(-1);
      }
    }

    for (int n=0; n<g_nbnodes; n++) {
      if (fscanf(fp,"%d \n",&g_bnode[n]) != 1) {
        op_printf("error reading from new_grid.dat\n"); exit(-1);
      }
    }
  }
  fclose(fp);

  nnode = compute_local_size (g_nnode, comm_size, my_rank);
  ncell = compute_local_size (g_ncell, comm_size, my_rank);
  nbnodes = compute_local_size (g_nbnodes, comm_size, my_rank);

  cell   = (int *) malloc(4*ncell*sizeof(int));
  bnode   = (int *) malloc(nbnodes*sizeof(int));
  xm      = (double *) malloc(2*nnode*sizeof(double));

  scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4);
  scatter_int_array(g_bnode, bnode, comm_size, g_nbnodes,nbnodes, 1);
  scatter_double_array(g_xm, xm, comm_size, g_nnode,nnode, 2);

  if(my_rank == MPI_ROOT) {
    free(g_cell);
    free(g_xm);
    free(g_bnode);
  }

  // set constants and initialise flow field and residual

  op_printf("initialising flow field \n");

  double gam = 1.4;
  gm1 = gam - 1.0;
  gm1i = 1.0/gm1;

  wtg1[0] = 0.5;
  wtg1[1] = 0.5;
  xi1[0] = 0.211324865405187;
  xi1[1] = 0.788675134594813;
  Ng1[0] = 0.788675134594813;
  Ng1[1] = 0.211324865405187;
  Ng1[2] = 0.211324865405187;
  Ng1[3] = 0.788675134594813;
  Ng1_xi[0] = -1;
  Ng1_xi[1] = -1;
  Ng1_xi[2] = 1;
  Ng1_xi[3] = 1;
  wtg2[0] = 0.25;
  wtg2[1] = 0.25;
  wtg2[2] = 0.25;
  wtg2[3] = 0.25;
  Ng2[0] = 0.622008467928146; Ng2[1] = 0.166666666666667; Ng2[2] = 0.166666666666667; Ng2[3] = 0.044658198738520;
  Ng2[4] = 0.166666666666667; Ng2[5] = 0.622008467928146; Ng2[6] = 0.044658198738520; Ng2[7] = 0.166666666666667;
  Ng2[8] = 0.166666666666667; Ng2[9] = 0.044658198738520; Ng2[10] = 0.622008467928146; Ng2[11] = 0.166666666666667;
  Ng2[12] = 0.044658198738520; Ng2[13] = 0.166666666666667; Ng2[14] = 0.166666666666667; Ng2[15] = 0.622008467928146;
  Ng2_xi[0] = -0.788675134594813;  Ng2_xi[1] = 0.788675134594813;  Ng2_xi[2] = -0.211324865405187;Ng2_xi[3] = 0.211324865405187;
  Ng2_xi[4] = -0.788675134594813;  Ng2_xi[5] = 0.788675134594813;  Ng2_xi[6] = -0.211324865405187; Ng2_xi[7] = 0.211324865405187;
  Ng2_xi[8] = -0.211324865405187;  Ng2_xi[9] = 0.211324865405187;  Ng2_xi[10] = -0.788675134594813; Ng2_xi[11] = 0.788675134594813;
  Ng2_xi[12] = -0.211324865405187;  Ng2_xi[13] = 0.211324865405187;  Ng2_xi[14] = -0.788675134594813; Ng2_xi[15] = 0.788675134594813;
  Ng2_xi[16] = -0.788675134594813;  Ng2_xi[17] = -0.211324865405187;  Ng2_xi[18] = 0.788675134594813; Ng2_xi[19] = 0.211324865405187;
  Ng2_xi[20] = -0.211324865405187;  Ng2_xi[21] = -0.788675134594813;  Ng2_xi[22] = 0.211324865405187; Ng2_xi[23] = 0.788675134594813;
  Ng2_xi[24] = -0.788675134594813;  Ng2_xi[25] = -0.211324865405187;  Ng2_xi[26] = 0.788675134594813; Ng2_xi[27] = 0.211324865405187;
  Ng2_xi[28] = -0.211324865405187;  Ng2_xi[29] = -0.788675134594813;  Ng2_xi[30] = 0.211324865405187; Ng2_xi[31] = 0.788675134594813;

  minf = 0.1;
  m2 = minf*minf;
  freq = 1;
  kappa = 1;
  nmode = 0;

  mfan = 1.0;

  double *phim = (double *)malloc(nnode*sizeof(double));
  memset(phim,0,nnode*sizeof(double));
  for (int i = 0;i<nnode;i++) {
    phim[i] = minf*xm[2*i];
  }

  double *K = (double *)malloc(4*4*ncell*sizeof(double));
  memset(K,0,4*4*ncell*sizeof(double));
  double *resm = (double *)malloc(nnode*sizeof(double));
  memset(resm,0,nnode*sizeof(double));

  double *V = (double *)malloc(nnode*sizeof(double));
  memset(V,0,nnode*sizeof(double));
  double *P = (double *)malloc(nnode*sizeof(double));
  memset(P,0,nnode*sizeof(double));
  double *U = (double *)malloc(nnode*sizeof(double));
  memset(U,0,nnode*sizeof(double));

  // declare sets, pointers, datasets and global constants

  op_set nodes  = op_decl_set(nnode,  "nodes");
  op_set bnodes = op_decl_set(nbnodes, "bedges");
  op_set cells  = op_decl_set(ncell,  "cells");

  op_map pbnodes  = op_decl_map(bnodes,nodes,1,bnode, "pbedge");
  op_map pcell   = op_decl_map(cells, nodes,4,cell,  "pcell");

  op_dat p_xm     = op_decl_dat(nodes ,2,"double",xm    ,"p_x");
  op_dat p_phim  = op_decl_dat(nodes, 1, "double", phim, "p_phim");
  op_dat p_resm  = op_decl_dat(nodes, 1, "double", resm, "p_resm");
  op_dat p_K  = op_decl_dat(cells, 16, "double:soa", K, "p_K");

  op_dat p_V = op_decl_dat(nodes, 1, "double", V, "p_V");
  op_dat p_P = op_decl_dat(nodes, 1, "double", P, "p_P");
  op_dat p_U = op_decl_dat(nodes, 1, "double", U, "p_U");

  op_decl_const(1,"double",&gam  );
  op_decl_const(1,"double",&gm1  );
  op_decl_const(1,"double",&gm1i  );
  op_decl_const(1,"double",&m2  );
  op_decl_const(2,"double",wtg1  );
  op_decl_const(2,"double",xi1  );
  op_decl_const(4,"double",Ng1  );
  op_decl_const(4,"double",Ng1_xi  );
  op_decl_const(4,"double",wtg2  );
  op_decl_const(16,"double",Ng2  );
  op_decl_const(32,"double",Ng2_xi  );
  op_decl_const(1,"double",&minf  );
  op_decl_const(1,"double",&freq  );
  op_decl_const(1,"double",&kappa  );
  op_decl_const(1,"double",&nmode  );
  op_decl_const(1,"double",&mfan  );

  op_diagnostic_output();

  op_partition("PTSCOTCH", "KWAY", cells, pcell, NULL);

  // main time-marching loop

  niter = 20;
  //initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);
  for(int iter=1; iter<=niter; iter++) {

   op_par_loop(res_calc,"res_calc",cells,
                op_arg_dat(p_xm,    -4, pcell, 2,"double",OP_READ),
                op_arg_dat(p_phim,  -4, pcell, 1,"double",OP_READ),
                op_arg_dat(p_K,     -1,     OP_ID, 16,"double:soa",OP_WRITE),
                op_arg_dat(p_resm,  -4, pcell, 1,"double",OP_INC)
                );

    op_par_loop(dirichlet,"dirichlet",bnodes,
                op_arg_dat(p_resm,  0, pbnodes, 1,"double",OP_WRITE));

    double c1 = 0;
    double c2 = 0;
    double c3 = 0;
    double alpha = 0;
    double beta = 0;

    //c1 = R'*R;
    op_par_loop(init_cg, "init_cg", nodes,
                op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ),
                op_arg_gbl(&c1, 1, "double", OP_INC),
                op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_WRITE),
                op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_WRITE),
                op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_WRITE));

    //set up stopping conditions
    double res0 = sqrt(c1);
    double res = res0;
    int inner_iter = 0;
    int maxiter = 200;
    while (res > 0.1*res0 && inner_iter < maxiter) {
      //V = Stiffness*P
      op_par_loop(spMV, "spMV", cells,
                  op_arg_dat(p_V, -4, pcell, 1, "double", OP_INC),
                  op_arg_dat(p_K, -1, OP_ID, 16, "double:soa", OP_READ),
                  op_arg_dat(p_P, -4, pcell, 1, "double", OP_READ));

      op_par_loop(dirichlet,"dirichlet",bnodes,
                  op_arg_dat(p_V,  0, pbnodes, 1,"double",OP_WRITE));

      c2 = 0;

      //c2 = P'*V;
      op_par_loop(dotPV, "dotPV", nodes,
                  op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_READ),
                  op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_READ),
                  op_arg_gbl(&c2, 1, "double", OP_INC));

      alpha = c1/c2;

      //U = U + alpha*P;
      //resm = resm-alpha*V;
      op_par_loop(updateUR, "updateUR", nodes,
                  op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_INC),
                  op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_INC),
                  op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_READ),
                  op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_RW),
                  op_arg_gbl(&alpha, 1, "double", OP_READ));

      c3 = 0;

      //c3 = resm'*resm;
      op_par_loop(dotR, "dotR", nodes,
                  op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ),
                  op_arg_gbl(&c3, 1, "double", OP_INC));
      beta = c3/c1;
      //P = beta*P+resm;
      op_par_loop(updateP, "updateP", nodes,
                  op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ),
                  op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_RW),
                  op_arg_gbl(&beta, 1, "double", OP_READ));
      c1 = c3;
      res = sqrt(c1);
      inner_iter++;
    }
    rms = 0;
    //phim = phim - Stiffness\Load;
    op_par_loop(update, "update", nodes,
                op_arg_dat(p_phim, -1, OP_ID, 1, "double", OP_RW),
                op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_WRITE),
                op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_READ),
                op_arg_gbl(&rms, 1, "double", OP_INC));
    op_printf("rms = %10.5e iter: %d\n", sqrt(rms)/sqrt(g_nnode), inner_iter);
  }
  op_timers(&cpu_t2, &wall_t2);
  op_timing_output();
  op_printf("Max total runtime = %f\n",wall_t2-wall_t1);
  op_exit();

  /*free(cell);
  free(bnode);
  free(xm);
  free(phim);
  free(K);
  free(resm);
  free(V);
  free(P);
  free(U);*/
}
示例#23
0
int main(int argc, char **argv)
{
  // OP initialisation
  op_init(argc,argv,2);

  //MPI for user I/O
  int my_rank;
  int comm_size;
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);

  //timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  int    *becell, *ecell,  *bound, *bedge, *edge, *cell;
  double  *x, *q, *qold, *adt, *res;

  int    nnode,ncell,nedge,nbedge,niter;
  double  rms;

  /**------------------------BEGIN I/O and PARTITIONING -------------------**/

  op_timers(&cpu_t1, &wall_t1);

  /* read in grid from disk on root processor */
  FILE *fp;

  if ( (fp = fopen("new_grid.dat","r")) == NULL) {
    op_printf("can't open file new_grid.dat\n"); exit(-1);
  }

  int   g_nnode,g_ncell,g_nedge,g_nbedge;

  check_scan(fscanf(fp,"%d %d %d %d \n",&g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4);

  int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0;
  double *g_x = 0,*g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0;

  // set constants

  op_printf("initialising flow field\n");
  gam = 1.4f;
  gm1 = gam - 1.0f;
  cfl = 0.9f;
  eps = 0.05f;

  double mach  = 0.4f;
  double alpha = 3.0f*atan(1.0f)/45.0f;
  double p     = 1.0f;
  double r     = 1.0f;
  double u     = sqrt(gam*p/r)*mach;
  double e     = p/(r*gm1) + 0.5f*u*u;

  qinf[0] = r;
  qinf[1] = r*u;
  qinf[2] = 0.0f;
  qinf[3] = r*e;

  op_printf("reading in grid \n");
  op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n"
      ,g_nnode,g_ncell,g_nedge,g_nbedge);

  if(my_rank == MPI_ROOT) {
    g_cell   = (int *) malloc(4*g_ncell*sizeof(int));
    g_edge   = (int *) malloc(2*g_nedge*sizeof(int));
    g_ecell  = (int *) malloc(2*g_nedge*sizeof(int));
    g_bedge  = (int *) malloc(2*g_nbedge*sizeof(int));
    g_becell = (int *) malloc(  g_nbedge*sizeof(int));
    g_bound  = (int *) malloc(  g_nbedge*sizeof(int));

    g_x      = (double *) malloc(2*g_nnode*sizeof(double));
    g_q      = (double *) malloc(4*g_ncell*sizeof(double));
    g_qold   = (double *) malloc(4*g_ncell*sizeof(double));
    g_res    = (double *) malloc(4*g_ncell*sizeof(double));
    g_adt    = (double *) malloc(  g_ncell*sizeof(double));

    for (int n=0; n<g_nnode; n++){
      check_scan(fscanf(fp,"%lf %lf \n",&g_x[2*n], &g_x[2*n+1]), 2);
    }

    for (int n=0; n<g_ncell; n++) {
      check_scan(fscanf(fp,"%d %d %d %d \n",&g_cell[4*n  ], &g_cell[4*n+1],
            &g_cell[4*n+2], &g_cell[4*n+3]), 4);
    }

    for (int n=0; n<g_nedge; n++) {
      check_scan(fscanf(fp,"%d %d %d %d \n",&g_edge[2*n],&g_edge[2*n+1],
            &g_ecell[2*n],&g_ecell[2*n+1]), 4);
    }

    for (int n=0; n<g_nbedge; n++) {
      check_scan(fscanf(fp,"%d %d %d %d \n",&g_bedge[2*n],&g_bedge[2*n+1],
            &g_becell[n],&g_bound[n]), 4);
    }

    //initialise flow field and residual

    for (int n=0; n<g_ncell; n++) {
      for (int m=0; m<4; m++) {
        g_q[4*n+m] = qinf[m];
        g_res[4*n+m] = 0.0f;
      }
    }
  }

  fclose(fp);

  nnode = compute_local_size (g_nnode, comm_size, my_rank);
  ncell = compute_local_size (g_ncell, comm_size, my_rank);
  nedge = compute_local_size (g_nedge, comm_size, my_rank);
  nbedge = compute_local_size (g_nbedge, comm_size, my_rank);

  op_printf("Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n"
      ,my_rank,nnode,ncell,nedge,nbedge);

  /*Allocate memory to hold local sets, mapping tables and data*/
  cell   = (int *) malloc(4*ncell*sizeof(int));
  edge   = (int *) malloc(2*nedge*sizeof(int));
  ecell  = (int *) malloc(2*nedge*sizeof(int));
  bedge  = (int *) malloc(2*nbedge*sizeof(int));
  becell = (int *) malloc(  nbedge*sizeof(int));
  bound  = (int *) malloc(  nbedge*sizeof(int));

  x      = (double *) malloc(2*nnode*sizeof(double));
  q      = (double *) malloc(4*ncell*sizeof(double));
  qold   = (double *) malloc(4*ncell*sizeof(double));
  res    = (double *) malloc(4*ncell*sizeof(double));
  adt    = (double *) malloc(  ncell*sizeof(double));

  /* scatter sets, mappings and data on sets*/
  scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4);
  scatter_int_array(g_edge, edge, comm_size, g_nedge,nedge, 2);
  scatter_int_array(g_ecell, ecell, comm_size, g_nedge,nedge, 2);
  scatter_int_array(g_bedge, bedge, comm_size, g_nbedge,nbedge, 2);
  scatter_int_array(g_becell, becell, comm_size, g_nbedge,nbedge, 1);
  scatter_int_array(g_bound, bound, comm_size, g_nbedge,nbedge, 1);

  scatter_double_array(g_x, x, comm_size, g_nnode,nnode, 2);
  scatter_double_array(g_q, q, comm_size, g_ncell,ncell, 4);
  scatter_double_array(g_qold, qold, comm_size, g_ncell,ncell, 4);
  scatter_double_array(g_res, res, comm_size, g_ncell,ncell, 4);
  scatter_double_array(g_adt, adt, comm_size, g_ncell,ncell, 1);

  /*Freeing memory allocated to gloabal arrays on rank 0
    after scattering to all processes*/
  if(my_rank == MPI_ROOT) {
    free(g_cell);
    free(g_edge);
    free(g_ecell);
    free(g_bedge);
    free(g_becell);
    free(g_bound);
    free(g_x );
    free(g_q);
    free(g_qold);
    free(g_adt);
    free(g_res);
  }

  op_timers(&cpu_t2, &wall_t2);
  op_printf("Max total file read time = %f\n", wall_t2-wall_t1);

  /**------------------------END I/O and PARTITIONING -----------------------**/

  // declare sets, pointers, datasets and global constants

  op_set nodes  = op_decl_set(nnode,  "nodes");
  op_set edges  = op_decl_set(nedge,  "edges");
  op_set bedges = op_decl_set(nbedge, "bedges");
  op_set cells  = op_decl_set(ncell,  "cells");

  op_map pedge   = op_decl_map(edges, nodes,2,edge,  "pedge");
  op_map pecell  = op_decl_map(edges, cells,2,ecell, "pecell");
  op_map pbedge  = op_decl_map(bedges,nodes,2,bedge, "pbedge");
  op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell");
  op_map pcell   = op_decl_map(cells, nodes,4,cell,  "pcell");

  op_dat p_bound = op_decl_dat(bedges,1,"int"  ,bound,"p_bound");
  op_dat p_x     = op_decl_dat(nodes ,2,"double",x    ,"p_x");
  op_dat p_q     = op_decl_dat(cells ,4,"double",q    ,"p_q");
  //op_dat p_qold  = op_decl_dat(cells ,4,"double",qold ,"p_qold");
  //op_dat p_adt   = op_decl_dat(cells ,1,"double",adt  ,"p_adt");
  //op_dat p_res   = op_decl_dat(cells ,4,"double",res  ,"p_res");

  // p_res, p_adt and p_qold  now declared as a temp op_dats during
  // the execution of the time-marching loop

  op_decl_const2("gam",1,"double",&gam  );
  op_decl_const2("gm1",1,"double",&gm1  );
  op_decl_const2("cfl",1,"double",&cfl  );
  op_decl_const2("eps",1,"double",&eps  );
  op_decl_const2("mach",1,"double",&mach );
  op_decl_const2("alpha",1,"double",&alpha);
  op_decl_const2("qinf",4,"double",qinf  );

  op_diagnostic_output();

  //trigger partitioning and halo creation routines
  op_partition("PTSCOTCH", "KWAY", cells, pecell, p_x);

  //initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  niter = 1000;
  for(int iter=1; iter<=niter; iter++) {

    double* tmp_elem = NULL;
    op_dat p_res   = op_decl_dat_temp(cells ,4,"double",tmp_elem,"p_res");
    op_dat p_adt   = op_decl_dat_temp(cells ,1,"double",tmp_elem,"p_adt");
    op_dat p_qold  = op_decl_dat_temp(cells ,4,"double",qold ,"p_qold");

    //save old flow solution
    op_par_loop_save_soln("save_soln",cells,
               op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ),
               op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_WRITE));

    //  predictor/corrector update loop

    for(int k=0; k<2; k++) {

      //    calculate area/timstep
      op_par_loop_adt_calc("adt_calc",cells,
                 op_arg_dat(p_x,0,pcell,2,"double",OP_READ),
                 op_arg_dat(p_x,1,pcell,2,"double",OP_READ),
                 op_arg_dat(p_x,2,pcell,2,"double",OP_READ),
                 op_arg_dat(p_x,3,pcell,2,"double",OP_READ),
                 op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ),
                 op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_WRITE));

      //    calculate flux residual
      op_par_loop_res_calc("res_calc",edges,
                 op_arg_dat(p_x,0,pedge,2,"double",OP_READ),
                 op_arg_dat(p_x,1,pedge,2,"double",OP_READ),
                 op_arg_dat(p_q,0,pecell,4,"double",OP_READ),
                 op_arg_dat(p_q,1,pecell,4,"double",OP_READ),
                 op_arg_dat(p_adt,0,pecell,1,"double",OP_READ),
                 op_arg_dat(p_adt,1,pecell,1,"double",OP_READ),
                 op_arg_dat(p_res,0,pecell,4,"double",OP_INC),
                 op_arg_dat(p_res,1,pecell,4,"double",OP_INC));

      op_par_loop_bres_calc("bres_calc",bedges,
                 op_arg_dat(p_x,0,pbedge,2,"double",OP_READ),
                 op_arg_dat(p_x,1,pbedge,2,"double",OP_READ),
                 op_arg_dat(p_q,0,pbecell,4,"double",OP_READ),
                 op_arg_dat(p_adt,0,pbecell,1,"double",OP_READ),
                 op_arg_dat(p_res,0,pbecell,4,"double",OP_INC),
                 op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ));

      //    update flow field

      rms = 0.0;

      op_par_loop_update("update",cells,
                 op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_READ),
                 op_arg_dat(p_q,-1,OP_ID,4,"double",OP_WRITE),
                 op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW),
                 op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_READ),
                 op_arg_gbl(&rms,1,"double",OP_INC));

    }

    //print iteration history
    rms = sqrt(rms/(double) g_ncell);
    if (iter%100 == 0)
      op_printf("%d  %10.5e \n",iter,rms);

    if (op_free_dat_temp(p_res) < 0)
      op_printf("Error: temporary op_dat %s cannot be removed\n",p_res->name);
    if (op_free_dat_temp(p_adt) < 0)
      op_printf("Error: temporary op_dat %s cannot be removed\n",p_adt->name);
    if (op_free_dat_temp(p_qold) < 0)
      op_printf("Error: temporary op_dat %s cannot be removed\n",p_qold->name);
  }

  op_timers(&cpu_t2, &wall_t2);
  op_timing_output();

  //print total time for niter interations
  op_printf("Max total runtime = %f\n",wall_t2-wall_t1);
  op_exit();

  free(cell);
  free(edge);
  free(ecell);
  free(bedge);
  free(becell);
  free(bound);
  free(x);
  free(q);
  free(qold);
  free(res);
  free(adt);
}
示例#24
0
void op_write_hdf5(char const * file_name)
{
  printf("Writing to %s\n",file_name);

  //declare timers
  double cpu_t1, cpu_t2, wall_t1, wall_t2;
  double time;
  double max_time;
  op_timers(&cpu_t1, &wall_t1); //timer start for hdf5 file write

  //create new communicator
  int my_rank, comm_size;
  MPI_Comm_dup(MPI_COMM_WORLD, &OP_MPI_HDF5_WORLD);
  MPI_Comm_rank(OP_MPI_HDF5_WORLD, &my_rank);
  MPI_Comm_size(OP_MPI_HDF5_WORLD, &comm_size);

  //MPI variables
  MPI_Info info  = MPI_INFO_NULL;

  //HDF5 APIs definitions
  hid_t       file_id; //file identifier
  hid_t plist_id;  //property list identifier
  hid_t dset_id = 0; //dataset identifier
  hid_t       dataspace; //data space identifier
  hid_t       memspace; //memory space identifier

  hsize_t     dimsf[2]; // dataset dimensions
  hsize_t count[2]; //hyperslab selection parameters
  hsize_t offset[2];

  //Set up file access property list with parallel I/O access
  plist_id = H5Pcreate(H5P_FILE_ACCESS);
  H5Pset_fapl_mpio(plist_id, OP_MPI_HDF5_WORLD, info);

  //Create a new file collectively and release property list identifier.
  file_id = H5Fcreate(file_name, H5F_ACC_TRUNC, H5P_DEFAULT, plist_id);
  H5Pclose(plist_id);

  /*loop over all the op_sets and write them to file*/
  for(int s=0; s<OP_set_index; s++) {
    op_set set=OP_set_list[s];

    //Create the dataspace for the dataset.
    hsize_t dimsf_set[] = {1};
    dataspace = H5Screate_simple(1, dimsf_set, NULL);

    //Create the dataset with default properties and close dataspace.
    dset_id = H5Dcreate(file_id, set->name, H5T_NATIVE_INT, dataspace,
        H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);

    //Create property list for collective dataset write.
    plist_id = H5Pcreate(H5P_DATASET_XFER);
    H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);

    int size = 0;
    int* sizes = (int *)xmalloc(sizeof(int)*comm_size);
    MPI_Allgather(&set->size, 1, MPI_INT, sizes, 1, MPI_INT, OP_MPI_HDF5_WORLD);
    for(int i = 0; i<comm_size; i++)size = size + sizes[i];

    //write data
    H5Dwrite(dset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, plist_id, &size);
    H5Sclose(dataspace);
    H5Pclose(plist_id);
    H5Dclose(dset_id);
  }


  /*loop over all the op_maps and write them to file*/
  for(int m=0; m<OP_map_index; m++) {
    op_map map=OP_map_list[m];

    //find total size of map
    int* sizes = (int *)xmalloc(sizeof(int)*comm_size);
    int g_size = 0;
    MPI_Allgather(&map->from->size, 1, MPI_INT, sizes, 1, MPI_INT, OP_MPI_HDF5_WORLD);
    for(int i = 0; i<comm_size; i++)g_size = g_size + sizes[i];

    //Create the dataspace for the dataset.
    dimsf[0] = g_size;
    dimsf[1] = map->dim;
    dataspace = H5Screate_simple(2, dimsf, NULL);

    //Create the dataset with default properties and close dataspace.
    if(sizeof(map->map[0]) == sizeof(int))
      dset_id = H5Dcreate(file_id, map->name, H5T_NATIVE_INT, dataspace,
          H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
    else if(sizeof(map->map[0]) == sizeof(long))
      dset_id = H5Dcreate(file_id, map->name, H5T_NATIVE_LONG, dataspace,
          H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
    else if(sizeof(map->map[0]) == sizeof(long long))
      dset_id = H5Dcreate(file_id, map->name, H5T_NATIVE_LLONG, dataspace,
          H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);

    H5Sclose(dataspace);


    //Each process defines dataset in memory and writes it to a hyperslab
    //in the file.
    int disp = 0;
    for(int i = 0; i<my_rank; i++)disp = disp + sizes[i];
    count[0] = map->from->size;
    count[1] = dimsf[1];
    offset[0] = disp;
    offset[1] = 0;
    memspace = H5Screate_simple(2, count, NULL);

    //Select hyperslab in the file.
    dataspace = H5Dget_space(dset_id);
    H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset, NULL, count, NULL);

    //Create property list for collective dataset write.
    plist_id = H5Pcreate(H5P_DATASET_XFER);
    H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);

    //write data
    if(sizeof(map->map[0]) == sizeof(int))
      H5Dwrite(dset_id, H5T_NATIVE_INT, memspace, dataspace, plist_id, map->map);
    else if(sizeof(map->map[0]) == sizeof(long))
      H5Dwrite(dset_id, H5T_NATIVE_LONG, memspace, dataspace, plist_id, map->map);
    else if(sizeof(map->map[0]) == sizeof(long long))
      H5Dwrite(dset_id, H5T_NATIVE_LLONG, memspace, dataspace, plist_id, map->map);

    H5Pclose(plist_id);
    H5Sclose(memspace);
    H5Sclose(dataspace);
    H5Dclose(dset_id);

    free(sizes);

    /*attach attributes to map*/

    //open existing data set
    dset_id = H5Dopen(file_id, map->name, H5P_DEFAULT);
    //create the data space for the attribute
    hsize_t dims = 1;
    dataspace = H5Screate_simple(1, &dims, NULL);

    //Create an int attribute - size
    hid_t attribute = H5Acreate(dset_id, "size", H5T_NATIVE_INT, dataspace,
        H5P_DEFAULT, H5P_DEFAULT);
    //Write the attribute data.
    H5Awrite(attribute, H5T_NATIVE_INT, &g_size);
    //Close the attribute.
    H5Aclose(attribute);

    //Create an int attribute - dimension
    attribute = H5Acreate(dset_id, "dim", H5T_NATIVE_INT, dataspace,
        H5P_DEFAULT, H5P_DEFAULT);
    //Write the attribute data.
    H5Awrite(attribute, H5T_NATIVE_INT, &map->dim);
    //Close the attribute.
    H5Aclose(attribute);
    H5Sclose(dataspace);

    //Create an string attribute - type
    dataspace= H5Screate(H5S_SCALAR);
    hid_t atype = H5Tcopy(H5T_C_S1);
    H5Tset_size(atype, 10);
    attribute = H5Acreate(dset_id, "type", atype, dataspace,
        H5P_DEFAULT, H5P_DEFAULT);

    if(sizeof(map->map[0]) == sizeof(int))
      H5Awrite(attribute, atype, "int");
    if(sizeof(map->map[0]) == sizeof(long))
      H5Awrite(attribute, atype, "long");
    if(sizeof(map->map[0]) == sizeof(long long))
      H5Awrite(attribute, atype, "long long");

    H5Aclose(attribute);
    //Close the dataspace
    H5Sclose(dataspace);
    //Close to the dataset.
    H5Dclose(dset_id);
  }

  /*loop over all the op_dats and write them to file*/
  for(int d=0; d<OP_dat_index; d++) {
    op_dat dat=OP_dat_list[d];

    //find total size of map
    int* sizes = (int *)xmalloc(sizeof(int)*comm_size);
    int g_size = 0;
    MPI_Allgather(&dat->set->size, 1, MPI_INT, sizes, 1, MPI_INT, OP_MPI_HDF5_WORLD);
    for(int i = 0; i<comm_size; i++)g_size = g_size + sizes[i];

    //Create the dataspace for the dataset.
    dimsf[0] = g_size;
    dimsf[1] = dat->dim;
    dataspace = H5Screate_simple(2, dimsf, NULL);

    //Create the dataset with default properties and close dataspace.
    if(strcmp(dat->type,"double")==0)
      dset_id = H5Dcreate(file_id, dat->name, H5T_NATIVE_DOUBLE, dataspace,
          H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
    else if(strcmp(dat->type,"float")==0)
      dset_id = H5Dcreate(file_id, dat->name, H5T_NATIVE_FLOAT, dataspace,
          H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
    else if(strcmp(dat->type,"int")==0)
      dset_id = H5Dcreate(file_id, dat->name, H5T_NATIVE_INT, dataspace,
          H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
    else printf("Unknown type\n");

    H5Sclose(dataspace);

    //Each process defines dataset in memory and writes it to a hyperslab
    //in the file.
    int disp = 0;
    for(int i = 0; i<my_rank; i++)disp = disp + sizes[i];
    count[0] = dat->set->size;
    count[1] = dimsf[1];
    offset[0] = disp;
    offset[1] = 0;
    memspace = H5Screate_simple(2, count, NULL);

    //Select hyperslab in the file.
    dataspace = H5Dget_space(dset_id);
    H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset, NULL, count, NULL);

    //Create property list for collective dataset write.
    plist_id = H5Pcreate(H5P_DATASET_XFER);
    H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);

    //write data
    if(strcmp(dat->type,"double") == 0)
      H5Dwrite(dset_id, H5T_NATIVE_DOUBLE, memspace, dataspace, plist_id, dat->data);
    else if(strcmp(dat->type,"float") == 0)
      H5Dwrite(dset_id, H5T_NATIVE_FLOAT, memspace, dataspace, plist_id, dat->data);
    else if(strcmp(dat->type,"int") == 0)
      H5Dwrite(dset_id, H5T_NATIVE_INT, memspace, dataspace, plist_id, dat->data);
    else printf("Unknown type\n");

    H5Pclose(plist_id);
    H5Sclose(memspace);
    H5Sclose(dataspace);
    H5Dclose(dset_id);
    free(sizes);


    /*attach attributes to dat*/

    //open existing data set
    dset_id = H5Dopen(file_id, dat->name, H5P_DEFAULT);
    //create the data space for the attribute
    hsize_t dims = 1;
    dataspace = H5Screate_simple(1, &dims, NULL);

    //Create an int attribute - size
    hid_t attribute = H5Acreate(dset_id, "size", H5T_NATIVE_INT, dataspace,
        H5P_DEFAULT, H5P_DEFAULT);
    //Write the attribute data.
    H5Awrite(attribute, H5T_NATIVE_INT, &dat->size);
    //Close the attribute.
    H5Aclose(attribute);

    //Create an int attribute - dimension
    attribute = H5Acreate(dset_id, "dim", H5T_NATIVE_INT, dataspace,
        H5P_DEFAULT, H5P_DEFAULT);
    //Write the attribute data.
    H5Awrite(attribute, H5T_NATIVE_INT, &dat->dim);
    H5Aclose(attribute);
    H5Sclose(dataspace);

    //Create an string attribute - type
    dataspace= H5Screate(H5S_SCALAR);
    hid_t atype = H5Tcopy(H5T_C_S1);
    H5Tset_size(atype, 10);
    attribute = H5Acreate(dset_id, "type", atype, dataspace,
        H5P_DEFAULT, H5P_DEFAULT);
    H5Awrite(attribute, atype, dat->type);
    H5Aclose(attribute);

    //Close the dataspace.
    H5Sclose(dataspace);
    H5Dclose(dset_id);
  }

  H5Fclose(file_id);

  op_timers(&cpu_t2, &wall_t2);  //timer stop for hdf5 file write
  //compute import/export lists creation time
  time = wall_t2-wall_t1;
  MPI_Reduce(&time, &max_time, 1, MPI_DOUBLE, MPI_MAX, MPI_ROOT, OP_MPI_HDF5_WORLD);
  //print performance results
  if(my_rank == MPI_ROOT)
  {
    printf("Max hdf5 file write time = %lf\n\n",max_time);
  }
  MPI_Comm_free(&OP_MPI_HDF5_WORLD);

}