Example #1
0
//extern "C"
op_dat op_decl_dat_f ( op_set set, int dim, char const *type,
											 int size, char ** data, char const *name )
{
  char * heapName = (char *) calloc ( strlen ( name ), sizeof ( char ) );
  char * typeName = (char *) calloc ( strlen ( type ), sizeof ( char ) );
  
  strncpy ( heapName, name, strlen ( name ) );
  strncpy ( typeName, type, strlen ( type ) );  
  
  return op_decl_dat ( set, dim, typeName, size, *data, heapName );
}
int main(int argc, char **argv)
{
  // OP initialisation
  op_init(argc,argv,2);

  //MPI for user I/O
  int my_rank;
  int comm_size;
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);

  //timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  int    *becell, *ecell,  *bound, *bedge, *edge, *cell;
  double  *x, *q, *qold, *adt, *res;

  int    nnode,ncell,nedge,nbedge,niter;
  double  rms;

  /**------------------------BEGIN I/O and PARTITIONING -------------------**/

  op_timers(&cpu_t1, &wall_t1);

  /* read in grid from disk on root processor */
  FILE *fp;

  if ( (fp = fopen("new_grid.dat","r")) == NULL) {
    op_printf("can't open file new_grid.dat\n"); exit(-1);
  }

  int   g_nnode,g_ncell,g_nedge,g_nbedge;

  check_scan(fscanf(fp,"%d %d %d %d \n",&g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4);

  int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0;
  double *g_x = 0,*g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0;

  // set constants

  op_printf("initialising flow field\n");
  gam = 1.4f;
  gm1 = gam - 1.0f;
  cfl = 0.9f;
  eps = 0.05f;

  double mach  = 0.4f;
  double alpha = 3.0f*atan(1.0f)/45.0f;
  double p     = 1.0f;
  double r     = 1.0f;
  double u     = sqrt(gam*p/r)*mach;
  double e     = p/(r*gm1) + 0.5f*u*u;

  qinf[0] = r;
  qinf[1] = r*u;
  qinf[2] = 0.0f;
  qinf[3] = r*e;

  op_printf("reading in grid \n");
  op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n"
      ,g_nnode,g_ncell,g_nedge,g_nbedge);

  if(my_rank == MPI_ROOT) {
    g_cell   = (int *) malloc(4*g_ncell*sizeof(int));
    g_edge   = (int *) malloc(2*g_nedge*sizeof(int));
    g_ecell  = (int *) malloc(2*g_nedge*sizeof(int));
    g_bedge  = (int *) malloc(2*g_nbedge*sizeof(int));
    g_becell = (int *) malloc(  g_nbedge*sizeof(int));
    g_bound  = (int *) malloc(  g_nbedge*sizeof(int));

    g_x      = (double *) malloc(2*g_nnode*sizeof(double));
    g_q      = (double *) malloc(4*g_ncell*sizeof(double));
    g_qold   = (double *) malloc(4*g_ncell*sizeof(double));
    g_res    = (double *) malloc(4*g_ncell*sizeof(double));
    g_adt    = (double *) malloc(  g_ncell*sizeof(double));

    for (int n=0; n<g_nnode; n++){
      check_scan(fscanf(fp,"%lf %lf \n",&g_x[2*n], &g_x[2*n+1]), 2);
    }

    for (int n=0; n<g_ncell; n++) {
      check_scan(fscanf(fp,"%d %d %d %d \n",&g_cell[4*n  ], &g_cell[4*n+1],
            &g_cell[4*n+2], &g_cell[4*n+3]), 4);
    }

    for (int n=0; n<g_nedge; n++) {
      check_scan(fscanf(fp,"%d %d %d %d \n",&g_edge[2*n],&g_edge[2*n+1],
            &g_ecell[2*n],&g_ecell[2*n+1]), 4);
    }

    for (int n=0; n<g_nbedge; n++) {
      check_scan(fscanf(fp,"%d %d %d %d \n",&g_bedge[2*n],&g_bedge[2*n+1],
            &g_becell[n],&g_bound[n]), 4);
    }

    //initialise flow field and residual

    for (int n=0; n<g_ncell; n++) {
      for (int m=0; m<4; m++) {
        g_q[4*n+m] = qinf[m];
        g_res[4*n+m] = 0.0f;
      }
    }
  }

  fclose(fp);

  nnode = compute_local_size (g_nnode, comm_size, my_rank);
  ncell = compute_local_size (g_ncell, comm_size, my_rank);
  nedge = compute_local_size (g_nedge, comm_size, my_rank);
  nbedge = compute_local_size (g_nbedge, comm_size, my_rank);

  op_printf("Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n"
      ,my_rank,nnode,ncell,nedge,nbedge);

  /*Allocate memory to hold local sets, mapping tables and data*/
  cell   = (int *) malloc(4*ncell*sizeof(int));
  edge   = (int *) malloc(2*nedge*sizeof(int));
  ecell  = (int *) malloc(2*nedge*sizeof(int));
  bedge  = (int *) malloc(2*nbedge*sizeof(int));
  becell = (int *) malloc(  nbedge*sizeof(int));
  bound  = (int *) malloc(  nbedge*sizeof(int));

  x      = (double *) malloc(2*nnode*sizeof(double));
  q      = (double *) malloc(4*ncell*sizeof(double));
  qold   = (double *) malloc(4*ncell*sizeof(double));
  res    = (double *) malloc(4*ncell*sizeof(double));
  adt    = (double *) malloc(  ncell*sizeof(double));

  /* scatter sets, mappings and data on sets*/
  scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4);
  scatter_int_array(g_edge, edge, comm_size, g_nedge,nedge, 2);
  scatter_int_array(g_ecell, ecell, comm_size, g_nedge,nedge, 2);
  scatter_int_array(g_bedge, bedge, comm_size, g_nbedge,nbedge, 2);
  scatter_int_array(g_becell, becell, comm_size, g_nbedge,nbedge, 1);
  scatter_int_array(g_bound, bound, comm_size, g_nbedge,nbedge, 1);

  scatter_double_array(g_x, x, comm_size, g_nnode,nnode, 2);
  scatter_double_array(g_q, q, comm_size, g_ncell,ncell, 4);
  scatter_double_array(g_qold, qold, comm_size, g_ncell,ncell, 4);
  scatter_double_array(g_res, res, comm_size, g_ncell,ncell, 4);
  scatter_double_array(g_adt, adt, comm_size, g_ncell,ncell, 1);

  /*Freeing memory allocated to gloabal arrays on rank 0
    after scattering to all processes*/
  if(my_rank == MPI_ROOT) {
    free(g_cell);
    free(g_edge);
    free(g_ecell);
    free(g_bedge);
    free(g_becell);
    free(g_bound);
    free(g_x );
    free(g_q);
    free(g_qold);
    free(g_adt);
    free(g_res);
  }

  op_timers(&cpu_t2, &wall_t2);
  op_printf("Max total file read time = %f\n", wall_t2-wall_t1);

  /**------------------------END I/O and PARTITIONING -----------------------**/

  // declare sets, pointers, datasets and global constants

  op_set nodes  = op_decl_set(nnode,  "nodes");
  op_set edges  = op_decl_set(nedge,  "edges");
  op_set bedges = op_decl_set(nbedge, "bedges");
  op_set cells  = op_decl_set(ncell,  "cells");

  op_map pedge   = op_decl_map(edges, nodes,2,edge,  "pedge");
  op_map pecell  = op_decl_map(edges, cells,2,ecell, "pecell");
  op_map pbedge  = op_decl_map(bedges,nodes,2,bedge, "pbedge");
  op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell");
  op_map pcell   = op_decl_map(cells, nodes,4,cell,  "pcell");

  op_dat p_bound = op_decl_dat(bedges,1,"int"  ,bound,"p_bound");
  op_dat p_x     = op_decl_dat(nodes ,2,"double",x    ,"p_x");
  op_dat p_q     = op_decl_dat(cells ,4,"double",q    ,"p_q");
  //op_dat p_qold  = op_decl_dat(cells ,4,"double",qold ,"p_qold");
  //op_dat p_adt   = op_decl_dat(cells ,1,"double",adt  ,"p_adt");
  //op_dat p_res   = op_decl_dat(cells ,4,"double",res  ,"p_res");

  // p_res, p_adt and p_qold  now declared as a temp op_dats during
  // the execution of the time-marching loop

  op_decl_const2("gam",1,"double",&gam  );
  op_decl_const2("gm1",1,"double",&gm1  );
  op_decl_const2("cfl",1,"double",&cfl  );
  op_decl_const2("eps",1,"double",&eps  );
  op_decl_const2("mach",1,"double",&mach );
  op_decl_const2("alpha",1,"double",&alpha);
  op_decl_const2("qinf",4,"double",qinf  );

  op_diagnostic_output();

  //trigger partitioning and halo creation routines
  op_partition("PTSCOTCH", "KWAY", cells, pecell, p_x);

  //initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  niter = 1000;
  for(int iter=1; iter<=niter; iter++) {

    double* tmp_elem = NULL;
    op_dat p_res   = op_decl_dat_temp(cells ,4,"double",tmp_elem,"p_res");
    op_dat p_adt   = op_decl_dat_temp(cells ,1,"double",tmp_elem,"p_adt");
    op_dat p_qold  = op_decl_dat_temp(cells ,4,"double",qold ,"p_qold");

    //save old flow solution
    op_par_loop_save_soln("save_soln",cells,
               op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ),
               op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_WRITE));

    //  predictor/corrector update loop

    for(int k=0; k<2; k++) {

      //    calculate area/timstep
      op_par_loop_adt_calc("adt_calc",cells,
                 op_arg_dat(p_x,0,pcell,2,"double",OP_READ),
                 op_arg_dat(p_x,1,pcell,2,"double",OP_READ),
                 op_arg_dat(p_x,2,pcell,2,"double",OP_READ),
                 op_arg_dat(p_x,3,pcell,2,"double",OP_READ),
                 op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ),
                 op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_WRITE));

      //    calculate flux residual
      op_par_loop_res_calc("res_calc",edges,
                 op_arg_dat(p_x,0,pedge,2,"double",OP_READ),
                 op_arg_dat(p_x,1,pedge,2,"double",OP_READ),
                 op_arg_dat(p_q,0,pecell,4,"double",OP_READ),
                 op_arg_dat(p_q,1,pecell,4,"double",OP_READ),
                 op_arg_dat(p_adt,0,pecell,1,"double",OP_READ),
                 op_arg_dat(p_adt,1,pecell,1,"double",OP_READ),
                 op_arg_dat(p_res,0,pecell,4,"double",OP_INC),
                 op_arg_dat(p_res,1,pecell,4,"double",OP_INC));

      op_par_loop_bres_calc("bres_calc",bedges,
                 op_arg_dat(p_x,0,pbedge,2,"double",OP_READ),
                 op_arg_dat(p_x,1,pbedge,2,"double",OP_READ),
                 op_arg_dat(p_q,0,pbecell,4,"double",OP_READ),
                 op_arg_dat(p_adt,0,pbecell,1,"double",OP_READ),
                 op_arg_dat(p_res,0,pbecell,4,"double",OP_INC),
                 op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ));

      //    update flow field

      rms = 0.0;

      op_par_loop_update("update",cells,
                 op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_READ),
                 op_arg_dat(p_q,-1,OP_ID,4,"double",OP_WRITE),
                 op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW),
                 op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_READ),
                 op_arg_gbl(&rms,1,"double",OP_INC));

    }

    //print iteration history
    rms = sqrt(rms/(double) g_ncell);
    if (iter%100 == 0)
      op_printf("%d  %10.5e \n",iter,rms);

    if (op_free_dat_temp(p_res) < 0)
      op_printf("Error: temporary op_dat %s cannot be removed\n",p_res->name);
    if (op_free_dat_temp(p_adt) < 0)
      op_printf("Error: temporary op_dat %s cannot be removed\n",p_adt->name);
    if (op_free_dat_temp(p_qold) < 0)
      op_printf("Error: temporary op_dat %s cannot be removed\n",p_qold->name);
  }

  op_timers(&cpu_t2, &wall_t2);
  op_timing_output();

  //print total time for niter interations
  op_printf("Max total runtime = %f\n",wall_t2-wall_t1);
  op_exit();

  free(cell);
  free(edge);
  free(ecell);
  free(bedge);
  free(becell);
  free(bound);
  free(x);
  free(q);
  free(qold);
  free(res);
  free(adt);
}
Example #3
0
int main(int argc, char **argv)
{
  // OP initialisation
  op_init(argc,argv,5);

  int   nnode, nedge, n, e;

  nnode = (NN-1)*(NN-1);
  nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2);

  int    *pp = (int *)malloc(sizeof(int)*2*nedge);

  float  *A  = (float *)malloc(sizeof(float)*nedge);
  float  *r  = (float *)malloc(sizeof(float)*nnode);
  float  *u  = (float *)malloc(sizeof(float)*nnode);
  float  *du = (float *)malloc(sizeof(float)*nnode);

  // create matrix and r.h.s., and set coordinates needed for renumbering / partitioning

  e = 0;

  for (int i=1; i<NN; i++) {
    for (int j=1; j<NN; j++) {
      n         = i-1 + (j-1)*(NN-1);
      r[n]      = 0.0f;
      u[n]      = 0.0f;
      du[n]     = 0.0f;

      pp[2*e]   = n;
      pp[2*e+1] = n;
      A[e]      = -1.0f;
      e++;

      for (int pass=0; pass<4; pass++) {
        int i2 = i;
        int j2 = j;
        if (pass==0) i2 += -1;
        if (pass==1) i2 +=  1;
        if (pass==2) j2 += -1;
        if (pass==3) j2 +=  1;

        if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) {
          r[n] += 0.25f;
        }
        else {
          pp[2*e]   = n;
          pp[2*e+1] = i2-1 + (j2-1)*(NN-1);
          A[e]      = 0.25f;
          e++;
        }
      }
    }
  }

  // declare sets, pointers, and datasets

  op_set nodes = op_decl_set(nnode, "nodes");
  op_set edges = op_decl_set(nedge, "edges");

  op_map ppedge = op_decl_map(edges,nodes,2,pp, "ppedge");

  op_dat p_A  = op_decl_dat(edges,1,"float",A,  "p_A" );
  op_dat p_r  = op_decl_dat(nodes,1,"float",r,  "p_r" );
  op_dat p_u  = op_decl_dat(nodes,1,"float",u,  "p_u" );
  op_dat p_du = op_decl_dat(nodes,1,"float",du, "p_du");

  alpha = 1.0f;
  op_decl_const2("alpha",1,"float",&alpha);

  op_diagnostic_output();

  // main iteration loop

  float u_sum, u_max, beta = 1.0f;

  for (int iter=0; iter<NITER; iter++) {
    op_par_loop_res("res",edges,
               op_arg_dat(p_A,-1,OP_ID,1,"float",OP_READ),
               op_arg_dat(p_u,1,ppedge,1,"float",OP_READ),
               op_arg_dat(p_du,0,ppedge,1,"float",OP_INC),
               op_arg_gbl(&beta,1,"float",OP_READ));

    u_sum = 0.0f;
    u_max = 0.0f;
    op_par_loop_update("update",nodes,
               op_arg_dat(p_r,-1,OP_ID,1,"float",OP_READ),
               op_arg_dat(p_du,-1,OP_ID,1,"float",OP_RW),
               op_arg_dat(p_u,-1,OP_ID,1,"float",OP_INC),
               op_arg_gbl(&u_sum,1,"float",OP_INC),
               op_arg_gbl(&u_max,1,"float",OP_MAX));
    op_printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/nnode));
  }

  // print out results

  op_printf("\n  Results after %d iterations:\n\n",NITER);

  op_fetch_data(p_u, u);

  for (int pass=0; pass<1; pass++) {
    for (int j=NN-1; j>0; j--) {
      for (int i=1; i<NN; i++) {
        if (pass==0)
          op_printf(" %7.4f",u[i-1 + (j-1)*(NN-1)]);
        else if (pass==1)
          op_printf(" %7.4f",du[i-1 + (j-1)*(NN-1)]);
        else if (pass==2)
          op_printf(" %7.4f",r[i-1 + (j-1)*(NN-1)]);
      }
      op_printf("\n");
    }
    op_printf("\n");
  }

  op_timing_output();
  int result = check_result<float>(u, NN, TOLERANCE);
  op_exit();

  free(pp);
  free(A);
  free(u);
  free(du);
  free(r);

  return result;
}
Example #4
0
int main(int argc, char **argv)
{
  // OP initialisation
  op_init(argc,argv,2);

  //MPI for user I/O
  int my_rank;
  int comm_size;
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);

  //timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  int *pp;
  double *A, *r, *u, *du;

  int   nnode, nedge;

  /**------------------------BEGIN I/O and PARTITIONING ---------------------**/

  int g_nnode, g_nedge, g_n, g_e;

  g_nnode = (NN-1)*(NN-1);
  g_nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2);

  int *g_pp = 0;
  double *g_A = 0, *g_r = 0, *g_u = 0, *g_du = 0;

  op_printf("Global number of nodes, edges = %d, %d\n",g_nnode,g_nedge);

  if(my_rank == MPI_ROOT) {
    g_pp = (int *)malloc(sizeof(int)*2*g_nedge);

    g_A  = (double *)malloc(sizeof(double)*g_nedge);
    g_r  = (double *)malloc(sizeof(double)*g_nnode);
    g_u  = (double *)malloc(sizeof(double)*g_nnode);
    g_du = (double *)malloc(sizeof(double)*g_nnode);

    // create matrix and r.h.s., and set coordinates needed for renumbering / partitioning

    g_e = 0;

    for (int i=1; i<NN; i++) {
      for (int j=1; j<NN; j++) {
        g_n         = i-1 + (j-1)*(NN-1);
        g_r[g_n]      = 0.0f;
        g_u[g_n]      = 0.0f;
        g_du[g_n]     = 0.0f;

        g_pp[2*g_e]   = g_n;
        g_pp[2*g_e+1] = g_n;
        g_A[g_e]      = -1.0f;
        g_e++;

        for (int pass=0; pass<4; pass++) {
          int i2 = i;
          int j2 = j;
          if (pass==0) i2 += -1;
          if (pass==1) i2 +=  1;
          if (pass==2) j2 += -1;
          if (pass==3) j2 +=  1;

          if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) {
            g_r[g_n] += 0.25f;
          }
          else {
            g_pp[2*g_e]   = g_n;
            g_pp[2*g_e+1] = i2-1 + (j2-1)*(NN-1);
            g_A[g_e]      = 0.25f;
            g_e++;
          }
        }
      }
    }
  }

  /* Compute local sizes */
  nnode = compute_local_size (g_nnode, comm_size, my_rank);
  nedge = compute_local_size (g_nedge, comm_size, my_rank);
  op_printf("Number of nodes, edges on process %d = %d, %d\n"
      ,my_rank,nnode,nedge);

  /*Allocate memory to hold local sets, mapping tables and data*/
  pp = (int *)malloc(2*sizeof(int)*nedge);

  A      = (double *) malloc(nedge*sizeof(double));
  r      = (double *) malloc(nnode*sizeof(double));
  u      = (double *) malloc(nnode*sizeof(double));
  du     = (double *) malloc(nnode*sizeof(double));

  /* scatter sets, mappings and data on sets*/
  scatter_int_array(g_pp, pp, comm_size, g_nedge,nedge, 2);
  scatter_double_array(g_A, A, comm_size, g_nedge,nedge, 1);
  scatter_double_array(g_r, r, comm_size, g_nnode,nnode, 1);
  scatter_double_array(g_u, u, comm_size, g_nnode,nnode, 1);
  scatter_double_array(g_du, du, comm_size, g_nnode,nnode, 1);

  /*Freeing memory allocated to gloabal arrays on rank 0
    after scattering to all processes*/
  if(my_rank == MPI_ROOT) {
    free(g_pp);
    free(g_A);
    free(g_r);
    free(g_u);
    free(g_du);
  }

  /**------------------------END I/O and PARTITIONING ---------------------**/

  // declare sets, pointers, and datasets

  op_set nodes = op_decl_set(nnode,"nodes");
  op_set edges = op_decl_set(nedge,"edges");

  op_map ppedge = op_decl_map(edges,nodes,2,pp, "ppedge");

  op_dat p_A = op_decl_dat(edges,1,"double", A,  "p_A" );
  op_dat p_r = op_decl_dat(nodes,1,"double", r,  "p_r" );
  op_dat p_u = op_decl_dat(nodes,1,"double", u,  "p_u" );
  op_dat p_du = op_decl_dat(nodes,1,"double", du,"p_du");

  alpha = 1.0f;
  op_decl_const(1,"double",&alpha);

  op_diagnostic_output();

  //trigger partitioning and halo creation routines
  op_partition("PTSCOTCH", "KWAY", NULL, NULL, NULL);

  //initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  // main iteration loop

  double u_sum, u_max, beta = 1.0f;

  for (int iter=0; iter<NITER; iter++) {
    op_par_loop(res,"res", edges,
        op_arg_dat(p_A,  -1,OP_ID,  1,"double", OP_READ),
        op_arg_dat(p_u,   1,ppedge, 1,"double", OP_READ),
        op_arg_dat(p_du,  0,ppedge, 1,"double", OP_INC),
        op_arg_gbl(&beta, 1,"double", OP_READ));

    u_sum = 0.0f;
    u_max = 0.0f;
    op_par_loop(update,"update", nodes,
        op_arg_dat(p_r,   -1,OP_ID, 1,"double",OP_READ),
        op_arg_dat(p_du,  -1,OP_ID, 1,"double",OP_RW),
        op_arg_dat(p_u,   -1,OP_ID, 1,"double",OP_INC),
        op_arg_gbl(&u_sum,1,"double",OP_INC),
        op_arg_gbl(&u_max,1,"double",OP_MAX));

    op_printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/g_nnode));
  }

  op_timers(&cpu_t2, &wall_t2);

  //get results data array
  op_dat temp = op_mpi_get_data(p_u);

  //output the result dat array to files
  print_dat_tofile(temp, "out_grid.dat"); //ASCI
  //print_dat_tobinfile(temp, "out_grid.bin"); //Binary

  //print each mpi process's timing info for each kernel
  op_timing_output();

  //print total time for niter interations
  op_printf("Max total runtime = %f\n",wall_t2-wall_t1);
  op_exit();
}
int main(int argc, char **argv){

  int    *becell, *ecell,  *bound, *bedge, *edge, *cell;
  float  *x, *q, *qold, *adt, *res;

  int    nnode,ncell,nedge,nbedge,niter;
  float  rms;

  // read in grid

  printf("reading in grid \n");

  FILE *fp;
  if ( (fp = fopen("new_grid.dat","r")) == NULL) {
    printf("can't open file new_grid.dat\n"); exit(-1);
  }

  if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) {
    printf("error reading from new_grid.dat\n"); exit(-1);
  }

  cell   = (int *) malloc(4*ncell*sizeof(int));
  edge   = (int *) malloc(2*nedge*sizeof(int));
  ecell  = (int *) malloc(2*nedge*sizeof(int));
  bedge  = (int *) malloc(2*nbedge*sizeof(int));
  becell = (int *) malloc(  nbedge*sizeof(int));
  bound  = (int *) malloc(  nbedge*sizeof(int));

  x      = (float *) malloc(2*nnode*sizeof(float));
  q      = (float *) malloc(4*ncell*sizeof(float));
  qold   = (float *) malloc(4*ncell*sizeof(float));
  res    = (float *) malloc(4*ncell*sizeof(float));
  adt    = (float *) malloc(  ncell*sizeof(float));

  for (int n=0; n<nnode; n++) {
    if (fscanf(fp,"%f %f \n",&x[2*n], &x[2*n+1]) != 2) {
      printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<ncell; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&cell[4*n  ], &cell[4*n+1],
                                   &cell[4*n+2], &cell[4*n+3]) != 4) {
      printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<nedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1],
                                   &ecell[2*n],&ecell[2*n+1]) != 4) {
      printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<nbedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1],
                                   &becell[n], &bound[n]) != 4) {
      printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  fclose(fp);

  // set constants and initialise flow field and residual

  printf("initialising flow field \n");

  gam = 1.4f;
  gm1 = gam - 1.0f;
  cfl = 0.9f;
  eps = 0.05f;

  float mach  = 0.4f;
  float alpha = 3.0f*atan(1.0f)/45.0f;  
  float p     = 1.0f;
  float r     = 1.0f;
  float u     = sqrt(gam*p/r)*mach;
  float e     = p/(r*gm1) + 0.5f*u*u;

  qinf[0] = r;
  qinf[1] = r*u;
  qinf[2] = 0.0f;
  qinf[3] = r*e;

  for (int n=0; n<ncell; n++) {
    for (int m=0; m<4; m++) {
        q[4*n+m] = qinf[m];
      res[4*n+m] = 0.0f;
    }
  }

  // OP initialisation

  printf("OP init\n");
  op_init(argc,argv,7);

  // declare sets, pointers, datasets and global constants

  op_set nodes  = op_decl_set(nnode,  "nodes");
  op_set edges  = op_decl_set(nedge,  "edges");
  op_set bedges = op_decl_set(nbedge, "bedges");
  op_set cells  = op_decl_set(ncell,  "cells");

  op_map pedge   = op_decl_map(edges, nodes,2,edge,  "pedge");
  op_map pecell  = op_decl_map(edges, cells,2,ecell, "pecell");
  op_map pbedge  = op_decl_map(bedges,nodes,2,bedge, "pbedge");
  op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell");
  op_map pcell   = op_decl_map(cells, nodes,4,cell,  "pcell");

  op_dat p_bound = op_decl_dat(bedges,1,"int"  ,bound,"p_bound");
  op_dat p_x     = op_decl_dat(nodes ,2,"float",x    ,"p_x");
  op_dat p_q     = op_decl_dat(cells ,4,"float",q    ,"p_q");
  op_dat p_qold  = op_decl_dat(cells ,4,"float",qold ,"p_qold");
  op_dat p_adt   = op_decl_dat(cells ,1,"float",adt  ,"p_adt");
  op_dat p_res   = op_decl_dat(cells ,4,"float",res  ,"p_res");

  op_decl_const(1,"float",&gam  );
  op_decl_const(1,"float",&gm1  );
  op_decl_const(1,"float",&cfl  );
  op_decl_const(1,"float",&eps  );
  op_decl_const(1,"float",&mach );
  op_decl_const(1,"float",&alpha);
  op_decl_const(4,"float",qinf  );

  op_tuner *OP_tuner;

  op_diagnostic_output();

// main time-marching loop

  niter = 1000;

  for(int iter=1; iter<=niter; iter++) {

//  save old flow solution

    op_par_loop(save_soln,"save_soln", cells,
                op_arg_dat(p_q,   -1,OP_ID, 4,"float",OP_READ ),
                op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_WRITE),
		NULL);

//  predictor/corrector update loop

    for(int k=0; k<2; k++) {

//    calculate area/timstep

      op_par_loop(adt_calc,"adt_calc",cells,
                  op_arg_dat(p_x,   0,pcell, 2,"float",OP_READ ),
                  op_arg_dat(p_x,   1,pcell, 2,"float",OP_READ ),
                  op_arg_dat(p_x,   2,pcell, 2,"float",OP_READ ),
                  op_arg_dat(p_x,   3,pcell, 2,"float",OP_READ ),
                  op_arg_dat(p_q,  -1,OP_ID, 4,"float",OP_READ ),
                  op_arg_dat(p_adt,-1,OP_ID, 1,"float",OP_WRITE),
		  NULL);

//    calculate flux residual

      op_par_loop(res_calc,"res_calc",edges,
                  op_arg_dat(p_x,    0,pedge, 2,"float",OP_READ),
                  op_arg_dat(p_x,    1,pedge, 2,"float",OP_READ),
                  op_arg_dat(p_q,    0,pecell,4,"float",OP_READ),
                  op_arg_dat(p_q,    1,pecell,4,"float",OP_READ),
                  op_arg_dat(p_adt,  0,pecell,1,"float",OP_READ),
                  op_arg_dat(p_adt,  1,pecell,1,"float",OP_READ),
                  op_arg_dat(p_res,  0,pecell,4,"float",OP_INC ),
                  op_arg_dat(p_res,  1,pecell,4,"float",OP_INC ),
		  NULL);

      op_par_loop(bres_calc,"bres_calc",bedges,
                  op_arg_dat(p_x,     0,pbedge, 2,"float",OP_READ),
                  op_arg_dat(p_x,     1,pbedge, 2,"float",OP_READ),
                  op_arg_dat(p_q,     0,pbecell,4,"float",OP_READ),
                  op_arg_dat(p_adt,   0,pbecell,1,"float",OP_READ),
                  op_arg_dat(p_res,   0,pbecell,4,"float",OP_INC ),
                  op_arg_dat(p_bound,-1,OP_ID  ,1,"int",  OP_READ),
		  NULL);

//    update flow field

      rms = 0.0;

      op_par_loop(update,"update",cells,
                  op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_READ ),
                  op_arg_dat(p_q,   -1,OP_ID, 4,"float",OP_WRITE),
                  op_arg_dat(p_res, -1,OP_ID, 4,"float",OP_RW   ),
                  op_arg_dat(p_adt, -1,OP_ID, 1,"float",OP_READ ),
                  op_arg_gbl(&rms,1,"float",OP_INC), 
		  NULL);
    }

//  print iteration history

    rms = sqrt(rms/(float) ncell);

    if (iter%100 == 0)
      printf(" %d  %10.5e \n",iter,rms);
  }


  op_timing_output();
}
Example #6
0
int main(int argc, char **argv) {
  // OP initialisation
  op_init(argc, argv, 2);

  // MPI for user I/O
  int my_rank;
  int comm_size;
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);

  // timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  int *becell, *ecell, *bound, *bedge, *edge, *cell;
  double *x, *q, *qold, *adt, *res;

  int nnode, ncell, nedge, nbedge, niter;

  /**------------------------BEGIN I/O and PARTITIONING -------------------**/

  op_timers(&cpu_t1, &wall_t1);

  /* read in grid from disk on root processor */
  FILE *fp;

  if ((fp = fopen("new_grid.dat", "r")) == NULL) {
    op_printf("can't open file new_grid.dat\n");
    exit(-1);
  }

  int g_nnode, g_ncell, g_nedge, g_nbedge;

  check_scan(
      fscanf(fp, "%d %d %d %d \n", &g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4);

  int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0,
      *g_cell = 0;
  double *g_x = 0, *g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0;

  op_printf("reading in grid \n");
  op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n",
            g_nnode, g_ncell, g_nedge, g_nbedge);

  if (my_rank == MPI_ROOT) {
    g_cell = (int *)malloc(4 * g_ncell * sizeof(int));
    g_edge = (int *)malloc(2 * g_nedge * sizeof(int));
    g_ecell = (int *)malloc(2 * g_nedge * sizeof(int));
    g_bedge = (int *)malloc(2 * g_nbedge * sizeof(int));
    g_becell = (int *)malloc(g_nbedge * sizeof(int));
    g_bound = (int *)malloc(g_nbedge * sizeof(int));

    g_x = (double *)malloc(2 * g_nnode * sizeof(double));
    g_q = (double *)malloc(4 * g_ncell * sizeof(double));
    g_qold = (double *)malloc(4 * g_ncell * sizeof(double));
    g_res = (double *)malloc(4 * g_ncell * sizeof(double));
    g_adt = (double *)malloc(g_ncell * sizeof(double));

    for (int n = 0; n < g_nnode; n++) {
      check_scan(fscanf(fp, "%lf %lf \n", &g_x[2 * n], &g_x[2 * n + 1]), 2);
    }

    for (int n = 0; n < g_ncell; n++) {
      check_scan(fscanf(fp, "%d %d %d %d \n", &g_cell[4 * n],
                        &g_cell[4 * n + 1], &g_cell[4 * n + 2],
                        &g_cell[4 * n + 3]),
                 4);
    }

    for (int n = 0; n < g_nedge; n++) {
      check_scan(fscanf(fp, "%d %d %d %d \n", &g_edge[2 * n],
                        &g_edge[2 * n + 1], &g_ecell[2 * n],
                        &g_ecell[2 * n + 1]),
                 4);
    }

    for (int n = 0; n < g_nbedge; n++) {
      check_scan(fscanf(fp, "%d %d %d %d \n", &g_bedge[2 * n],
                        &g_bedge[2 * n + 1], &g_becell[n], &g_bound[n]),
                 4);
    }

    // initialise flow field and residual
  }

  fclose(fp);

  nnode = compute_local_size(g_nnode, comm_size, my_rank);
  ncell = compute_local_size(g_ncell, comm_size, my_rank);
  nedge = compute_local_size(g_nedge, comm_size, my_rank);
  nbedge = compute_local_size(g_nbedge, comm_size, my_rank);

  op_printf(
      "Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n",
      my_rank, nnode, ncell, nedge, nbedge);

  /*Allocate memory to hold local sets, mapping tables and data*/
  cell = (int *)malloc(4 * ncell * sizeof(int));
  edge = (int *)malloc(2 * nedge * sizeof(int));
  ecell = (int *)malloc(2 * nedge * sizeof(int));
  bedge = (int *)malloc(2 * nbedge * sizeof(int));
  becell = (int *)malloc(nbedge * sizeof(int));
  bound = (int *)malloc(nbedge * sizeof(int));

  x = (double *)malloc(2 * nnode * sizeof(double));
  q = (double *)malloc(4 * ncell * sizeof(double));
  qold = (double *)malloc(4 * ncell * sizeof(double));
  res = (double *)malloc(4 * ncell * sizeof(double));
  adt = (double *)malloc(ncell * sizeof(double));

  /* scatter sets, mappings and data on sets*/
  scatter_int_array(g_cell, cell, comm_size, g_ncell, ncell, 4);
  scatter_int_array(g_edge, edge, comm_size, g_nedge, nedge, 2);
  scatter_int_array(g_ecell, ecell, comm_size, g_nedge, nedge, 2);
  scatter_int_array(g_bedge, bedge, comm_size, g_nbedge, nbedge, 2);
  scatter_int_array(g_becell, becell, comm_size, g_nbedge, nbedge, 1);
  scatter_int_array(g_bound, bound, comm_size, g_nbedge, nbedge, 1);

  scatter_double_array(g_x, x, comm_size, g_nnode, nnode, 2);
  scatter_double_array(g_q, q, comm_size, g_ncell, ncell, 4);
  scatter_double_array(g_qold, qold, comm_size, g_ncell, ncell, 4);
  scatter_double_array(g_res, res, comm_size, g_ncell, ncell, 4);
  scatter_double_array(g_adt, adt, comm_size, g_ncell, ncell, 1);

  /*Freeing memory allocated to gloabal arrays on rank 0
    after scattering to all processes*/
  if (my_rank == MPI_ROOT) {
    free(g_cell);
    free(g_edge);
    free(g_ecell);
    free(g_bedge);
    free(g_becell);
    free(g_bound);
    free(g_x);
    free(g_q);
    free(g_qold);
    free(g_adt);
    free(g_res);
  }

  op_timers(&cpu_t2, &wall_t2);
  op_printf("Max total file read time = %f\n", wall_t2 - wall_t1);

  /**------------------------END I/O and PARTITIONING -----------------------**/

  op_set edges = op_decl_set(nedge, "edges");
  op_set cells = op_decl_set(ncell, "cells");

  op_map pecell = op_decl_map(edges, cells, 2, ecell, "pecell");
  op_dat p_res = op_decl_dat(cells, 4, "double", res, "p_res");

  int count;

  // trigger partitioning and halo creation routines
  op_partition("PTSCOTCH", "KWAY", cells, pecell, NULL);

  op_diagnostic_output();

  // initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  // indirect reduction
  count = 0;
  op_par_loop_res_calc("res_calc", edges,
                       op_arg_dat(p_res, 0, pecell, 4, "double", OP_INC),
                       op_arg_gbl(&count, 1, "int", OP_INC));
  op_printf("number of edges:: %d should be: %d \n", count, g_nedge);
  if (count != g_nedge)
    op_printf("indirect reduction FAILED\n");
  else
    op_printf("indirect reduction PASSED\n");
  // direct reduction
  count = 0;
  op_par_loop_update("update", cells,
                     op_arg_dat(p_res, -1, OP_ID, 4, "double", OP_RW),
                     op_arg_gbl(&count, 1, "int", OP_INC));
  op_printf("number of cells: %d should be: %d \n", count, g_ncell);
  if (count != g_ncell)
    op_printf("direct reduction FAILED\n");
  else
    op_printf("direct reduction PASSED\n");

  op_timers(&cpu_t2, &wall_t2);

  op_timing_output();

  op_exit();

  free(cell);
  free(edge);
  free(ecell);
  free(bedge);
  free(becell);
  free(bound);
  free(x);
  free(q);
  free(qold);
  free(res);
  free(adt);
}
int main(int argc,char *argv[])
{
  int *becell;
  int *ecell;
  int *bound;
  int *bedge;
  int *edge;
  int *cell;
  float *x;
  float *q;
  float *qold;
  float *adt;
  float *res;
  int nnode;
  int ncell;
  int nedge;
  int nbedge;
  int niter;
  float rms;
  if (argc != 2) {
    printf("Usage: airfoil <grid>\n");
    exit(1);
  }
// read in grid
  printf("reading in grid \n");
  char *grid = argv[1];
  FILE *fp;
  if ((fp = fopen(grid,"r")) == 0L) {
    printf("can\'t open file %s\n",grid);
    exit((-1));
  }
  if (fscanf(fp,"%d %d %d %d \n",&nnode,&ncell,&nedge,&nbedge) != 4) {
    printf("error reading from %s\n",grid);
    exit((-1));
  }
  cell = ((int *)(malloc(((4 * ncell) * (sizeof(int ))))));
  edge = ((int *)(malloc(((2 * nedge) * (sizeof(int ))))));
  ecell = ((int *)(malloc(((2 * nedge) * (sizeof(int ))))));
  bedge = ((int *)(malloc(((2 * nbedge) * (sizeof(int ))))));
  becell = ((int *)(malloc((nbedge * (sizeof(int ))))));
  bound = ((int *)(malloc((nbedge * (sizeof(int ))))));
  x = ((float *)(malloc(((2 * nnode) * (sizeof(float ))))));
  q = ((float *)(malloc(((4 * ncell) * (sizeof(float ))))));
  qold = ((float *)(malloc(((4 * ncell) * (sizeof(float ))))));
  res = ((float *)(malloc(((4 * ncell) * (sizeof(float ))))));
  adt = ((float *)(malloc((ncell * (sizeof(float ))))));
  for (int n = 0; n < nnode; n++) {
    if (fscanf(fp,"%f %f \n",(x + (2 * n)),(x + ((2 * n) + 1))) != 2) {
      printf("error reading from new_grid.dat\n");
      exit((-1));
    }
  }
  for (int n = 0; n < ncell; n++) {
    if (fscanf(fp,"%d %d %d %d \n",(cell + (4 * n)),(cell + ((4 * n) + 1)),(cell + ((4 * n) + 2)),(cell + ((4 * n) + 3))) != 4) {
      printf("error reading from new_grid.dat\n");
      exit((-1));
    }
  }
  for (int n = 0; n < nedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",(edge + (2 * n)),(edge + ((2 * n) + 1)),(ecell + (2 * n)),(ecell + ((2 * n) + 1))) != 4) {
      printf("error reading from new_grid.dat\n");
      exit((-1));
    }
  }
  for (int n = 0; n < nbedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",(bedge + (2 * n)),(bedge + ((2 * n) + 1)),(becell + n),(bound + n)) != 4) {
      printf("error reading from new_grid.dat\n");
      exit((-1));
    }
  }
  fclose(fp);
// set constants and initialise flow field and residual
  printf("initialising flow field \n");
  gam = 1.4f;
  gm1 = (gam - 1.0f);
  cfl = 0.9f;
  eps = 0.05f;
  float mach = 0.4f;
  float alpha = ((3.0f * atan(1.0f)) / 45.0f);
  float p = 1.0f;
  float r = 1.0f;
  float u = (sqrt(((gam * p) / r)) * mach);
  float e = ((p / (r * gm1)) + ((0.5f * u) * u));
  qinf[0] = r;
  qinf[1] = (r * u);
  qinf[2] = 0.0f;
  qinf[3] = (r * e);
  for (int n = 0; n < ncell; n++) {
    for (int m = 0; m < 4; m++) {
      q[(4 * n) + m] = qinf[m];
      res[(4 * n) + m] = 0.0f;
    }
  }
// OP initialisation
  op_init(argc,argv,2);
// declare sets, pointers, datasets and global constants
  op_set nodes = op_decl_set(nnode,"nodes");
  op_set edges = op_decl_set(nedge,"edges");
  op_set bedges = op_decl_set(nbedge,"bedges");
  op_set cells = op_decl_set(ncell,"cells");
  op_map pedge = op_decl_map(edges,nodes,2,edge,"pedge");
  op_map pecell = op_decl_map(edges,cells,2,ecell,"pecell");
  op_map pbedge = op_decl_map(bedges,nodes,2,bedge,"pbedge");
  op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell");
  op_map pcell = op_decl_map(cells,nodes,4,cell,"pcell");
  op_dat p_bound = op_decl_dat(bedges,1,"int",bound,"p_bound");
  op_dat p_x = op_decl_dat(nodes,2,"float",x,"p_x");
  op_dat p_q = op_decl_dat(cells,4,"float",q,"p_q");
  op_dat p_qold = op_decl_dat(cells,4,"float",qold,"p_qold");
  op_dat p_adt = op_decl_dat(cells,1,"float",adt,"p_adt");
  op_dat p_res = op_decl_dat(cells,4,"float",res,"p_res");
  op_decl_const(1,"float",&gam);
  op_decl_const(1,"float",&gm1);
  op_decl_const(1,"float",&cfl);
  op_decl_const(1,"float",&eps);
  op_decl_const(1,"float",&mach);
  op_decl_const(1,"float",&alpha);
  op_decl_const(4,"float",qinf);
  op_diagnostic_output();
// main time-marching loop
  niter = 1000;
  for (int iter = 1; iter <= niter; iter++) {
//  save old flow solution
    save_soln_host("save_soln_modified",cells,op_arg_dat(p_q,(-1), OP_ID,4,"float",OP_READ),op_arg_dat(p_qold,(-1), OP_ID,4,"float",OP_WRITE));
//  predictor/corrector update loop
    for (int k = 0; k < 2; k++) {
//    calculate area/timstep
      adt_calc_host("adt_calc_modified",cells,op_arg_dat(p_x,0,pcell,2,"float",OP_READ),op_arg_dat(p_x,1,pcell,2,"float",OP_READ),op_arg_dat(p_x,2,pcell,2,"float",OP_READ),op_arg_dat(p_x,3,pcell,2,"float",OP_READ),op_arg_dat(p_q,(-1), OP_ID,4,"float",OP_READ),op_arg_dat(p_adt,(-1), OP_ID,1,"float",OP_WRITE));
//    calculate flux residual
      res_calc_host("res_calc_modified",edges,op_arg_dat(p_x,0,pedge,2,"float",OP_READ),op_arg_dat(p_x,1,pedge,2,"float",OP_READ),op_arg_dat(p_q,0,pecell,4,"float",OP_READ),op_arg_dat(p_q,1,pecell,4,"float",OP_READ),op_arg_dat(p_adt,0,pecell,1,"float",OP_READ),op_arg_dat(p_adt,1,pecell,1,"float",OP_READ),op_arg_dat(p_res,0,pecell,4,"float",OP_INC),op_arg_dat(p_res,1,pecell,4,"float",OP_INC));
      bres_calc_host("bres_calc_modified",bedges,op_arg_dat(p_x,0,pbedge,2,"float",OP_READ),op_arg_dat(p_x,1,pbedge,2,"float",OP_READ),op_arg_dat(p_q,0,pbecell,4,"float",OP_READ),op_arg_dat(p_adt,0,pbecell,1,"float",OP_READ),op_arg_dat(p_res,0,pbecell,4,"float",OP_INC),op_arg_dat(p_bound,(-1), OP_ID,1,"int",OP_READ));
//    update flow field
      rms = 0.0;
      update_host("update_modified",cells,op_arg_dat(p_qold,(-1), OP_ID,4,"float",OP_READ),op_arg_dat(p_q,(-1), OP_ID,4,"float",OP_WRITE),op_arg_dat(p_res,(-1), OP_ID,4,"float",OP_RW),op_arg_dat(p_adt,(-1), OP_ID,1,"float",OP_READ),op_arg_gbl(&rms,1,"float",OP_INC));
    }
//  print iteration history
    rms = (sqrt((rms / ((float )ncell))));
    if ((iter % 100) == 0) 
      printf(" %d  %10.5e \n",iter,rms);
  }
/*  for (int ll = 0; ll < (4 * ncell); ll++) 
    printf("%lf\n",q[ll]);*/
  op_timing_output();
  return 0;
}
Example #8
0
int main(int argc, char **argv)
{
  // OP initialisation
  op_init(argc,argv,2);

  int    *becell, *ecell,  *bound, *bedge, *edge, *cell;
  double  *x, *q, *qold, *adt, *res;

  int    nnode,ncell,nedge,nbedge,niter;
  double  rms;

  //timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  // read in grid

  op_printf("reading in grid \n");

  FILE *fp;
  if ( (fp = fopen("./new_grid.dat","r")) == NULL) {
    op_printf("can't open file new_grid.dat\n"); exit(-1);
  }

  if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) {
    op_printf("error reading from new_grid.dat\n"); exit(-1);
  }

  cell   = (int *) malloc(4*ncell*sizeof(int));
  edge   = (int *) malloc(2*nedge*sizeof(int));
  ecell  = (int *) malloc(2*nedge*sizeof(int));
  bedge  = (int *) malloc(2*nbedge*sizeof(int));
  becell = (int *) malloc(  nbedge*sizeof(int));
  bound  = (int *) malloc(  nbedge*sizeof(int));

  x      = (double *) malloc(2*nnode*sizeof(double));
  q      = (double *) malloc(4*ncell*sizeof(double));
  qold   = (double *) malloc(4*ncell*sizeof(double));
  res    = (double *) malloc(4*ncell*sizeof(double));
  adt    = (double *) malloc(  ncell*sizeof(double));

  for (int n=0; n<nnode; n++) {
    if (fscanf(fp,"%lf %lf \n",&x[2*n], &x[2*n+1]) != 2) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<ncell; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&cell[4*n  ], &cell[4*n+1],
                                   &cell[4*n+2], &cell[4*n+3]) != 4) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<nedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1],
                                   &ecell[2*n],&ecell[2*n+1]) != 4) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<nbedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1],
                                   &becell[n], &bound[n]) != 4) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  fclose(fp);

  // set constants and initialise flow field and residual

  op_printf("initialising flow field \n");

  gam = 1.4f;
  gm1 = gam - 1.0f;
  cfl = 0.9f;
  eps = 0.05f;

  double mach  = 0.4f;
  double alpha = 3.0f*atan(1.0f)/45.0f;
  double p     = 1.0f;
  double r     = 1.0f;
  double u     = sqrt(gam*p/r)*mach;
  double e     = p/(r*gm1) + 0.5f*u*u;

  qinf[0] = r;
  qinf[1] = r*u;
  qinf[2] = 0.0f;
  qinf[3] = r*e;

  for (int n=0; n<ncell; n++) {
    for (int m=0; m<4; m++) {
        q[4*n+m] = qinf[m];
      res[4*n+m] = 0.0f;
    }
  }

  // declare sets, pointers, datasets and global constants

  op_set nodes  = op_decl_set(nnode,  "nodes");
  op_set edges  = op_decl_set(nedge,  "edges");
  op_set bedges = op_decl_set(nbedge, "bedges");
  op_set cells  = op_decl_set(ncell,  "cells");

  op_map pedge   = op_decl_map(edges, nodes,2,edge,  "pedge");
  op_map pecell  = op_decl_map(edges, cells,2,ecell, "pecell");
  op_map pbedge  = op_decl_map(bedges,nodes,2,bedge, "pbedge");
  op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell");
  op_map pcell   = op_decl_map(cells, nodes,4,cell,  "pcell");

  op_dat p_bound = op_decl_dat(bedges,1,"int"  ,bound,"p_bound");
  op_dat p_x     = op_decl_dat(nodes ,2,"double",x    ,"p_x");
  op_dat p_q     = op_decl_dat(cells ,4,"double",q    ,"p_q");
  //op_dat p_qold  = op_decl_dat(cells ,4,"double",qold ,"p_qold");
  //op_dat p_adt   = op_decl_dat(cells ,1,"double",adt  ,"p_adt");
  //op_dat p_res   = op_decl_dat(cells ,4,"double",res  ,"p_res");

  // p_res, p_adt and p_qold  now declared as a temp op_dats during
  // the execution of the time-marching loop

  op_decl_const2("gam",1,"double",&gam);
  op_decl_const2("gm1",1,"double",&gm1);
  op_decl_const2("cfl",1,"double",&cfl);
  op_decl_const2("eps",1,"double",&eps);
  op_decl_const2("mach",1,"double",&mach);
  op_decl_const2("alpha",1,"double",&alpha);
  op_decl_const2("qinf",4,"double",qinf);

  op_diagnostic_output();

  double g_ncell = op_get_size(cells);

  //initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  // main time-marching loop

  niter = 1000;

  for(int iter=1; iter<=niter; iter++) {

    double* tmp_elem = NULL;
    op_dat p_res   = op_decl_dat_temp(cells ,4,"double",tmp_elem,"p_res");
    op_dat p_adt   = op_decl_dat_temp(cells ,1,"double",tmp_elem,"p_adt");
    op_dat p_qold  = op_decl_dat_temp(cells ,4,"double",qold ,"p_qold");

    // save old flow solution

    op_par_loop_save_soln("save_soln",cells,
                op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ),
                op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_WRITE));

    // predictor/corrector update loop

    for(int k=0; k<2; k++) {

      // calculate area/timstep

      op_par_loop_adt_calc("adt_calc",cells,
                  op_arg_dat(p_x,0,pcell,2,"double",OP_READ),
                  op_arg_dat(p_x,1,pcell,2,"double",OP_READ),
                  op_arg_dat(p_x,2,pcell,2,"double",OP_READ),
                  op_arg_dat(p_x,3,pcell,2,"double",OP_READ),
                  op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ),
                  op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_WRITE));

      // calculate flux residual

      op_par_loop_res_calc("res_calc",edges,
                  op_arg_dat(p_x,0,pedge,2,"double",OP_READ),
                  op_arg_dat(p_x,1,pedge,2,"double",OP_READ),
                  op_arg_dat(p_q,0,pecell,4,"double",OP_READ),
                  op_arg_dat(p_q,1,pecell,4,"double",OP_READ),
                  op_arg_dat(p_adt,0,pecell,1,"double",OP_READ),
                  op_arg_dat(p_adt,1,pecell,1,"double",OP_READ),
                  op_arg_dat(p_res,0,pecell,4,"double",OP_INC),
                  op_arg_dat(p_res,1,pecell,4,"double",OP_INC));

      op_par_loop_bres_calc("bres_calc",bedges,
                  op_arg_dat(p_x,0,pbedge,2,"double",OP_READ),
                  op_arg_dat(p_x,1,pbedge,2,"double",OP_READ),
                  op_arg_dat(p_q,0,pbecell,4,"double",OP_READ),
                  op_arg_dat(p_adt,0,pbecell,1,"double",OP_READ),
                  op_arg_dat(p_res,0,pbecell,4,"double",OP_INC),
                  op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ));

      // update flow field

      rms = 0.0;

      op_par_loop_update("update",cells,
                  op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_READ),
                  op_arg_dat(p_q,-1,OP_ID,4,"double",OP_WRITE),
                  op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW),
                  op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_READ),
                  op_arg_gbl(&rms,1,"double",OP_INC));
    }

    // print iteration history
    rms = sqrt(rms/(double)g_ncell );
    if (iter%100 == 0)
      op_printf(" %d  %10.5e \n",iter,rms);

    if (iter%1000 == 0 && g_ncell == 720000){ //defailt mesh -- for validation testing
      //op_printf(" %d  %3.16f \n",iter,rms);
      double diff=fabs((100.0*(rms/0.0001060114637578))-100.0);
      op_printf("\n\nTest problem with %d cells is within %3.15E %% of the expected solution\n",720000, diff);
      if(diff < 0.00001) {
        op_printf("This test is considered PASSED\n");
      }
      else {
        op_printf("This test is considered FAILED\n");
      }
    }

    if (op_free_dat_temp(p_res) < 0)
      op_printf("Error: temporary op_dat %s cannot be removed\n",p_res->name);
    if (op_free_dat_temp(p_adt) < 0)
      op_printf("Error: temporary op_dat %s cannot be removed\n",p_adt->name);
    if (op_free_dat_temp(p_qold) < 0)
      op_printf("Error: temporary op_dat %s cannot be removed\n",p_qold->name);
  }

  op_timers(&cpu_t2, &wall_t2);
  op_timing_output();
  op_printf("Max total runtime = %f\n",wall_t2-wall_t1);

  op_exit();

  free(cell);
  free(edge);
  free(ecell);
  free(bedge);
  free(becell);
  free(bound);
  free(x);
  free(q);
  free(qold);
  free(res);
  free(adt);
}
Example #9
0
int main(int argc, char **argv){

  int   nnode, nedge, n, e;
  float dx;

  nnode = (NN-1)*(NN-1);
  nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2);
  dx    = 1.0f / ((float) NN);

  int    *pp = (int *)malloc(sizeof(int)*2*nedge);
  int    *p1 = (int *)malloc(sizeof(int)*nedge);
  int    *p2 = (int *)malloc(sizeof(int)*nedge);

  float  *xe = (float *)malloc(sizeof(float)*2*nedge);
  float  *xn = (float *)malloc(sizeof(float)*2*nnode);

  double *A  = (double *)malloc(sizeof(double)*3*nedge);
  float  *r  = (float *)malloc(sizeof(float)*2*nnode);
  float  *u  = (float *)malloc(sizeof(float)*2*nnode);
  float  *du = (float *)malloc(sizeof(float)*3*nnode);

  // create matrix and r.h.s., and set coordinates needed for renumbering / partitioning

  e = 0;

  for (int i=1; i<NN; i++) {
    for (int j=1; j<NN; j++) {
      n         = i-1 + (j-1)*(NN-1);
      r[2*n]      = 0.0f;
      u[2*n]      = 0.0f;
      du[3*n]     = 0.0f;
      xn[2*n  ] = i*dx;
      xn[2*n+1] = j*dx;

      p1[e]     = n;
      p2[e]     = n;
      pp[2*e]   = p1[e];
      pp[2*e+1] = p2[e];
      A[3*e]      = -1.0f;
      xe[2*e  ] = i*dx;
      xe[2*e+1] = j*dx;
      e++;

      for (int pass=0; pass<4; pass++) {
        int i2 = i;
        int j2 = j;
        if (pass==0) i2 += -1;
        if (pass==1) i2 +=  1;
        if (pass==2) j2 += -1;
        if (pass==3) j2 +=  1;

        if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) {
          r[2*n] += 0.25f;
	}
        else {
          p1[e]     = n;
          p2[e]     = i2-1 + (j2-1)*(NN-1);
          pp[2*e]   = p1[e];
          pp[2*e+1] = p2[e];
          A[3*e]      = 0.25f;
          xe[2*e  ] = i*dx;
          xe[2*e+1] = j*dx;
          e++;
        }
      }
    }
  }

  // OP initialisation

  op_init(argc,argv,5);

  // declare sets, pointers, and datasets

  op_set nodes = op_decl_set(nnode, "nodes");
  op_set edges = op_decl_set(nedge, "edges");

  op_map ppedge = op_decl_map(edges,nodes,2,pp, "ppedge");

  op_dat p_A  = op_decl_dat(edges,3,"double",A,  "p_A" );
  op_dat p_r  = op_decl_dat(nodes,2,"float", r,  "p_r" );
  op_dat p_u  = op_decl_dat(nodes,2,"float", u,  "p_u" );
  op_dat p_du = op_decl_dat(nodes,3,"float", du, "p_du");

  alpha = 2.0f;
  op_decl_const(1,"float",&alpha);
  alpha = 1.0f;
  op_decl_const(1,"float",&alpha);

  op_diagnostic_output();

  // main iteration loop

  float u_sum, u_max, beta = 1.0f;

  for (int iter=0; iter<NITER; iter++) {
    op_par_loop(res,"res", edges,
                op_arg_dat(p_A, -1,OP_ID,  3,"double",OP_READ),
                op_arg_dat(p_u,  1,ppedge, 2,"float", OP_READ),
                op_arg_dat(p_du, 0,ppedge, 3,"float", OP_INC ),
                op_arg_gbl(&beta,1,"float",OP_READ));

    u_sum = 0.0f;
    u_max = 0.0f;
    op_par_loop(update,"update", nodes,
                op_arg_dat(p_r,  -1,OP_ID, 2,"float",OP_READ),
                op_arg_dat(p_du, -1,OP_ID, 3,"float",OP_RW  ),
                op_arg_dat(p_u,  -1,OP_ID, 2,"float",OP_INC ),
                op_arg_gbl(&u_sum,1,"float",OP_INC),
                op_arg_gbl(&u_max,1,"float",OP_MAX));
    printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/nnode));
  }

  // print out results

  printf("\n  Results after %d iterations:\n\n",NITER);

  op_fetch_data(p_u);
  /*
  op_fetch_data(p_du);
  op_fetch_data(p_r);
  */

  for (int pass=0; pass<1; pass++) {
    /*
    if(pass==0)      printf("\narray u\n");
    else if(pass==1) printf("\narray du\n");
    else if(pass==2) printf("\narray r\n");
    */

    for (int j=NN-1; j>0; j--) {
      for (int i=1; i<NN; i++) {
        if (pass==0)
	  printf(" %7.4f",u[2*(i-1 + (j-1)*(NN-1))]);
        else if (pass==1)
          printf(" %7.4f",du[i-1 + (j-1)*(NN-1)]);
        else if (pass==2)
          printf(" %7.4f",r[2*(i-1 + (j-1)*(NN-1))]);
      }
      printf("\n");
    }
    printf("\n");
  }

  op_timing_output();

  op_exit();

  // free allocated arrays

  free(pp);
  free(A);
  free(r);
  free(u);
  free(du);
}
Example #10
0
int main(int argc, char **argv)
{
  // OP initialisation

  op_init(argc,argv,2);

  //MPI for user I/O
  int my_rank;
  int comm_size;
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);

  //timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  int    *bnode, *cell, *g_bnode, *g_cell;
  double  *xm, *g_xm;;

  int    nnode,ncell,nbnodes,niter, g_nnode, g_ncell, g_nbnodes;
  double  rms = 1;

  // read in grid

  op_printf("reading in grid \n");

  FILE *fp;
  if ( (fp = fopen("FE_grid.dat","r")) == NULL) {
    op_printf("can't open file FE_grid.dat\n"); exit(-1);
  }

  if (fscanf(fp,"%d %d %d \n",&g_nnode, &g_ncell, &g_nbnodes) != 3) {
    op_printf("error reading from new_grid.dat\n"); exit(-1);
  }

  if (my_rank == MPI_ROOT) {
    g_cell   = (int *) malloc(4*g_ncell*sizeof(int));
    g_bnode   = (int *) malloc(g_nbnodes*sizeof(int));
    g_xm      = (double *) malloc(2*g_nnode*sizeof(double));

    for (int n=0; n<g_nnode; n++) {
      if (fscanf(fp,"%lf %lf \n",&g_xm[2*n], &g_xm[2*n+1]) != 2) {
        op_printf("error reading from new_grid.dat\n"); exit(-1);
      }
    }

    for (int n=0; n<g_ncell; n++) {
      if (fscanf(fp,"%d %d %d %d \n",&g_cell[4*n  ], &g_cell[4*n+1],
      &g_cell[4*n+2], &g_cell[4*n+3]) != 4) {
        op_printf("error reading from new_grid.dat\n"); exit(-1);
      }
    }

    for (int n=0; n<g_nbnodes; n++) {
      if (fscanf(fp,"%d \n",&g_bnode[n]) != 1) {
        op_printf("error reading from new_grid.dat\n"); exit(-1);
      }
    }
  }
  fclose(fp);

  nnode = compute_local_size (g_nnode, comm_size, my_rank);
  ncell = compute_local_size (g_ncell, comm_size, my_rank);
  nbnodes = compute_local_size (g_nbnodes, comm_size, my_rank);

  cell   = (int *) malloc(4*ncell*sizeof(int));
  bnode   = (int *) malloc(nbnodes*sizeof(int));
  xm      = (double *) malloc(2*nnode*sizeof(double));

  scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4);
  scatter_int_array(g_bnode, bnode, comm_size, g_nbnodes,nbnodes, 1);
  scatter_double_array(g_xm, xm, comm_size, g_nnode,nnode, 2);

  if(my_rank == MPI_ROOT) {
    free(g_cell);
    free(g_xm);
    free(g_bnode);
  }

  // set constants and initialise flow field and residual

  op_printf("initialising flow field \n");

  double gam = 1.4;
  gm1 = gam - 1.0;
  gm1i = 1.0/gm1;

  wtg1[0] = 0.5;
  wtg1[1] = 0.5;
  xi1[0] = 0.211324865405187;
  xi1[1] = 0.788675134594813;
  Ng1[0] = 0.788675134594813;
  Ng1[1] = 0.211324865405187;
  Ng1[2] = 0.211324865405187;
  Ng1[3] = 0.788675134594813;
  Ng1_xi[0] = -1;
  Ng1_xi[1] = -1;
  Ng1_xi[2] = 1;
  Ng1_xi[3] = 1;
  wtg2[0] = 0.25;
  wtg2[1] = 0.25;
  wtg2[2] = 0.25;
  wtg2[3] = 0.25;
  Ng2[0] = 0.622008467928146; Ng2[1] = 0.166666666666667; Ng2[2] = 0.166666666666667; Ng2[3] = 0.044658198738520;
  Ng2[4] = 0.166666666666667; Ng2[5] = 0.622008467928146; Ng2[6] = 0.044658198738520; Ng2[7] = 0.166666666666667;
  Ng2[8] = 0.166666666666667; Ng2[9] = 0.044658198738520; Ng2[10] = 0.622008467928146; Ng2[11] = 0.166666666666667;
  Ng2[12] = 0.044658198738520; Ng2[13] = 0.166666666666667; Ng2[14] = 0.166666666666667; Ng2[15] = 0.622008467928146;
  Ng2_xi[0] = -0.788675134594813;  Ng2_xi[1] = 0.788675134594813;  Ng2_xi[2] = -0.211324865405187;Ng2_xi[3] = 0.211324865405187;
  Ng2_xi[4] = -0.788675134594813;  Ng2_xi[5] = 0.788675134594813;  Ng2_xi[6] = -0.211324865405187; Ng2_xi[7] = 0.211324865405187;
  Ng2_xi[8] = -0.211324865405187;  Ng2_xi[9] = 0.211324865405187;  Ng2_xi[10] = -0.788675134594813; Ng2_xi[11] = 0.788675134594813;
  Ng2_xi[12] = -0.211324865405187;  Ng2_xi[13] = 0.211324865405187;  Ng2_xi[14] = -0.788675134594813; Ng2_xi[15] = 0.788675134594813;
  Ng2_xi[16] = -0.788675134594813;  Ng2_xi[17] = -0.211324865405187;  Ng2_xi[18] = 0.788675134594813; Ng2_xi[19] = 0.211324865405187;
  Ng2_xi[20] = -0.211324865405187;  Ng2_xi[21] = -0.788675134594813;  Ng2_xi[22] = 0.211324865405187; Ng2_xi[23] = 0.788675134594813;
  Ng2_xi[24] = -0.788675134594813;  Ng2_xi[25] = -0.211324865405187;  Ng2_xi[26] = 0.788675134594813; Ng2_xi[27] = 0.211324865405187;
  Ng2_xi[28] = -0.211324865405187;  Ng2_xi[29] = -0.788675134594813;  Ng2_xi[30] = 0.211324865405187; Ng2_xi[31] = 0.788675134594813;

  minf = 0.1;
  m2 = minf*minf;
  freq = 1;
  kappa = 1;
  nmode = 0;

  mfan = 1.0;

  double *phim = (double *)malloc(nnode*sizeof(double));
  memset(phim,0,nnode*sizeof(double));
  for (int i = 0;i<nnode;i++) {
    phim[i] = minf*xm[2*i];
  }

  double *K = (double *)malloc(4*4*ncell*sizeof(double));
  memset(K,0,4*4*ncell*sizeof(double));
  double *resm = (double *)malloc(nnode*sizeof(double));
  memset(resm,0,nnode*sizeof(double));

  double *V = (double *)malloc(nnode*sizeof(double));
  memset(V,0,nnode*sizeof(double));
  double *P = (double *)malloc(nnode*sizeof(double));
  memset(P,0,nnode*sizeof(double));
  double *U = (double *)malloc(nnode*sizeof(double));
  memset(U,0,nnode*sizeof(double));

  // declare sets, pointers, datasets and global constants

  op_set nodes  = op_decl_set(nnode,  "nodes");
  op_set bnodes = op_decl_set(nbnodes, "bedges");
  op_set cells  = op_decl_set(ncell,  "cells");

  op_map pbnodes  = op_decl_map(bnodes,nodes,1,bnode, "pbedge");
  op_map pcell   = op_decl_map(cells, nodes,4,cell,  "pcell");

  op_dat p_xm     = op_decl_dat(nodes ,2,"double",xm    ,"p_x");
  op_dat p_phim  = op_decl_dat(nodes, 1, "double", phim, "p_phim");
  op_dat p_resm  = op_decl_dat(nodes, 1, "double", resm, "p_resm");
  op_dat p_K  = op_decl_dat(cells, 16, "double:soa", K, "p_K");

  op_dat p_V = op_decl_dat(nodes, 1, "double", V, "p_V");
  op_dat p_P = op_decl_dat(nodes, 1, "double", P, "p_P");
  op_dat p_U = op_decl_dat(nodes, 1, "double", U, "p_U");

  op_decl_const(1,"double",&gam  );
  op_decl_const(1,"double",&gm1  );
  op_decl_const(1,"double",&gm1i  );
  op_decl_const(1,"double",&m2  );
  op_decl_const(2,"double",wtg1  );
  op_decl_const(2,"double",xi1  );
  op_decl_const(4,"double",Ng1  );
  op_decl_const(4,"double",Ng1_xi  );
  op_decl_const(4,"double",wtg2  );
  op_decl_const(16,"double",Ng2  );
  op_decl_const(32,"double",Ng2_xi  );
  op_decl_const(1,"double",&minf  );
  op_decl_const(1,"double",&freq  );
  op_decl_const(1,"double",&kappa  );
  op_decl_const(1,"double",&nmode  );
  op_decl_const(1,"double",&mfan  );

  op_diagnostic_output();

  op_partition("PTSCOTCH", "KWAY", cells, pcell, NULL);

  // main time-marching loop

  niter = 20;
  //initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);
  for(int iter=1; iter<=niter; iter++) {

   op_par_loop(res_calc,"res_calc",cells,
                op_arg_dat(p_xm,    -4, pcell, 2,"double",OP_READ),
                op_arg_dat(p_phim,  -4, pcell, 1,"double",OP_READ),
                op_arg_dat(p_K,     -1,     OP_ID, 16,"double:soa",OP_WRITE),
                op_arg_dat(p_resm,  -4, pcell, 1,"double",OP_INC)
                );

    op_par_loop(dirichlet,"dirichlet",bnodes,
                op_arg_dat(p_resm,  0, pbnodes, 1,"double",OP_WRITE));

    double c1 = 0;
    double c2 = 0;
    double c3 = 0;
    double alpha = 0;
    double beta = 0;

    //c1 = R'*R;
    op_par_loop(init_cg, "init_cg", nodes,
                op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ),
                op_arg_gbl(&c1, 1, "double", OP_INC),
                op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_WRITE),
                op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_WRITE),
                op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_WRITE));

    //set up stopping conditions
    double res0 = sqrt(c1);
    double res = res0;
    int inner_iter = 0;
    int maxiter = 200;
    while (res > 0.1*res0 && inner_iter < maxiter) {
      //V = Stiffness*P
      op_par_loop(spMV, "spMV", cells,
                  op_arg_dat(p_V, -4, pcell, 1, "double", OP_INC),
                  op_arg_dat(p_K, -1, OP_ID, 16, "double:soa", OP_READ),
                  op_arg_dat(p_P, -4, pcell, 1, "double", OP_READ));

      op_par_loop(dirichlet,"dirichlet",bnodes,
                  op_arg_dat(p_V,  0, pbnodes, 1,"double",OP_WRITE));

      c2 = 0;

      //c2 = P'*V;
      op_par_loop(dotPV, "dotPV", nodes,
                  op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_READ),
                  op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_READ),
                  op_arg_gbl(&c2, 1, "double", OP_INC));

      alpha = c1/c2;

      //U = U + alpha*P;
      //resm = resm-alpha*V;
      op_par_loop(updateUR, "updateUR", nodes,
                  op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_INC),
                  op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_INC),
                  op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_READ),
                  op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_RW),
                  op_arg_gbl(&alpha, 1, "double", OP_READ));

      c3 = 0;

      //c3 = resm'*resm;
      op_par_loop(dotR, "dotR", nodes,
                  op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ),
                  op_arg_gbl(&c3, 1, "double", OP_INC));
      beta = c3/c1;
      //P = beta*P+resm;
      op_par_loop(updateP, "updateP", nodes,
                  op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ),
                  op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_RW),
                  op_arg_gbl(&beta, 1, "double", OP_READ));
      c1 = c3;
      res = sqrt(c1);
      inner_iter++;
    }
    rms = 0;
    //phim = phim - Stiffness\Load;
    op_par_loop(update, "update", nodes,
                op_arg_dat(p_phim, -1, OP_ID, 1, "double", OP_RW),
                op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_WRITE),
                op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_READ),
                op_arg_gbl(&rms, 1, "double", OP_INC));
    op_printf("rms = %10.5e iter: %d\n", sqrt(rms)/sqrt(g_nnode), inner_iter);
  }
  op_timers(&cpu_t2, &wall_t2);
  op_timing_output();
  op_printf("Max total runtime = %f\n",wall_t2-wall_t1);
  op_exit();

  /*free(cell);
  free(bnode);
  free(xm);
  free(phim);
  free(K);
  free(resm);
  free(V);
  free(P);
  free(U);*/
}
int main(int argc, char *argv[]){

  int    *becell, *ecell,  *bound, *bedge, *edge, *cell;
  REAL  *x, *q, *qold, *adt, *res;

  int    nnode,ncell,nedge,nbedge,niter;
  REAL  rms;

  if (argc != 2) {
    printf("Usage: airfoil <grid>\n");
    exit(1);
  }

  // read in grid

  printf("reading in grid \n");
  char* grid = argv[1];

  FILE *fp;
  if ( (fp = fopen(grid,"r")) == NULL) {
    printf("can't open file %s\n", grid); exit(-1);
  }

  if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) {
    printf("error reading from %s\n", grid); exit(-1);
  }

  cell   = (int *) malloc(4*ncell*sizeof(int));
  edge   = (int *) malloc(2*nedge*sizeof(int));
  ecell  = (int *) malloc(2*nedge*sizeof(int));
  bedge  = (int *) malloc(2*nbedge*sizeof(int));
  becell = (int *) malloc(  nbedge*sizeof(int));
  bound  = (int *) malloc(  nbedge*sizeof(int));

  x      = (REAL *) malloc(2*nnode*sizeof(REAL));
  q      = (REAL *) malloc(4*ncell*sizeof(REAL));
  qold   = (REAL *) malloc(4*ncell*sizeof(REAL));
  res    = (REAL *) malloc(4*ncell*sizeof(REAL));
  adt    = (REAL *) malloc(  ncell*sizeof(REAL));

  for (int n=0; n<nnode; n++) {
    if (fscanf(fp,"%f %f \n",&x[2*n], &x[2*n+1]) != 2) {
      printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<ncell; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&cell[4*n  ], &cell[4*n+1],
                                   &cell[4*n+2], &cell[4*n+3]) != 4) {
      printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<nedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1],
                                   &ecell[2*n],&ecell[2*n+1]) != 4) {
      printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<nbedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1],
                                   &becell[n], &bound[n]) != 4) {
      printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  fclose(fp);

  // set constants and initialise flow field and residual

  printf("initialising flow field \n");

  gam = 1.4f;
  gm1 = gam - 1.0f;
  cfl = 0.9f;
  eps = 0.05f;

  REAL mach  = 0.4f;
  REAL alpha = 3.0f*atan(1.0f)/45.0f;  
  REAL p     = 1.0f;
  REAL r     = 1.0f;
  REAL u     = sqrt(gam*p/r)*mach;
  REAL e     = p/(r*gm1) + 0.5f*u*u;

  qinf[0] = r;
  qinf[1] = r*u;
  qinf[2] = 0.0f;
  qinf[3] = r*e;

  for (int n=0; n<ncell; n++) {
    for (int m=0; m<4; m++) {
        q[4*n+m] = qinf[m];
      res[4*n+m] = 0.0f;
    }
  }

  // OP initialisation

  op_init(argc,argv,2);

  op_tuner* global_tuner = op_create_global_tuner();  
  global_tuner->op_warpsize = 1;
  global_tuner->block_size = 64;
  global_tuner->part_size = 128;
  global_tuner->cache_line_size = 128;

  // declare sets, pointers, datasets and global constants

  op_set nodes  = op_decl_set(nnode,  "nodes");
  op_set edges  = op_decl_set(nedge,  "edges");
  op_set bedges = op_decl_set(nbedge, "bedges");
  op_set cells  = op_decl_set(ncell,  "cells");

  op_map pedge   = op_decl_map(edges, nodes,2,edge,  "pedge");
  op_map pecell  = op_decl_map(edges, cells,2,ecell, "pecell");
  op_map pbedge  = op_decl_map(bedges,nodes,2,bedge, "pbedge");
  op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell");
  op_map pcell   = op_decl_map(cells, nodes,4,cell,  "pcell");

  op_dat p_bound = op_decl_dat(bedges,1,"int"  ,bound,"p_bound");
  op_dat p_x     = op_decl_dat(nodes ,2,REAL_STRING,x    ,"p_x");
  op_dat p_q     = op_decl_dat(cells ,4,REAL_STRING,q    ,"p_q");
  op_dat p_qold  = op_decl_dat(cells ,4,REAL_STRING,qold ,"p_qold");
  op_dat p_adt   = op_decl_dat(cells ,1,REAL_STRING,adt  ,"p_adt");
  op_dat p_res   = op_decl_dat(cells ,4,REAL_STRING,res  ,"p_res");

  op_decl_const(1,REAL_STRING,&gam  );
  op_decl_const(1,REAL_STRING,&gm1  );
  op_decl_const(1,REAL_STRING,&cfl  );
  op_decl_const(1,REAL_STRING,&eps  );
  op_decl_const(1,REAL_STRING,&mach );
  op_decl_const(1,REAL_STRING,&alpha);
  op_decl_const(4,REAL_STRING,qinf  );

  op_tuner* save_soln_tuner = op_create_tuner("save_soln");
  save_soln_tuner->part_size = 64;
  save_soln_tuner->block_size = 4;

  op_tuner* adt_calc_tuner = op_create_tuner("adt_calc");
  adt_calc_tuner->part_size = 64;
  adt_calc_tuner->block_size = 4;

  op_tuner* res_calc_tuner = op_create_tuner("res_calc");
  res_calc_tuner->part_size = 64;
  res_calc_tuner->block_size = 4;

  op_tuner* bres_calc_tuner = op_create_tuner("bres_calc");
  bres_calc_tuner->part_size = 64;
  bres_calc_tuner->block_size = 4;
 
  op_tuner* update_tuner = op_create_tuner("update");
  update_tuner->part_size = 64;
  update_tuner->block_size = 4;

    op_diagnostic_output();

// main time-marching loop

  niter = 1000;

  for(int iter=1; iter<=niter; iter++) {

//  save old flow solution

    op_par_loop(save_soln,"save_soln", cells,
                op_arg_dat(p_q,   -1,OP_ID, 4,REAL_STRING,OP_READ ),
                op_arg_dat(p_qold,-1,OP_ID, 4,REAL_STRING,OP_WRITE),
                save_soln_tuner);

//  predictor/corrector update loop

    for(int k=0; k<2; k++) {

//    calculate area/timstep

      op_par_loop(adt_calc,"adt_calc",cells,
                  op_arg_dat(p_x,   0,pcell, 2,REAL_STRING,OP_READ ),
                  op_arg_dat(p_x,   1,pcell, 2,REAL_STRING,OP_READ ),
                  op_arg_dat(p_x,   2,pcell, 2,REAL_STRING,OP_READ ),
                  op_arg_dat(p_x,   3,pcell, 2,REAL_STRING,OP_READ ),
                  op_arg_dat(p_q,  -1,OP_ID, 4,REAL_STRING,OP_READ ),
                  op_arg_dat(p_adt,-1,OP_ID, 1,REAL_STRING,OP_WRITE),
                  adt_calc_tuner);

//    calculate flux residual

      op_par_loop(res_calc,"res_calc",edges,
                  op_arg_dat(p_x,    0,pedge, 2,REAL_STRING,OP_READ),
                  op_arg_dat(p_x,    1,pedge, 2,REAL_STRING,OP_READ),
                  op_arg_dat(p_q,    0,pecell,4,REAL_STRING,OP_READ),
                  op_arg_dat(p_q,    1,pecell,4,REAL_STRING,OP_READ),
                  op_arg_dat(p_adt,  0,pecell,1,REAL_STRING,OP_READ),
                  op_arg_dat(p_adt,  1,pecell,1,REAL_STRING,OP_READ),
                  op_arg_dat(p_res,  0,pecell,4,REAL_STRING,OP_INC ),
                  op_arg_dat(p_res,  1,pecell,4,REAL_STRING,OP_INC ),
                  res_calc_tuner);

      op_par_loop(bres_calc,"bres_calc",bedges,
                  op_arg_dat(p_x,     0,pbedge, 2,REAL_STRING,OP_READ),
                  op_arg_dat(p_x,     1,pbedge, 2,REAL_STRING,OP_READ),
                  op_arg_dat(p_q,     0,pbecell,4,REAL_STRING,OP_READ),
                  op_arg_dat(p_adt,   0,pbecell,1,REAL_STRING,OP_READ),
                  op_arg_dat(p_res,   0,pbecell,4,REAL_STRING,OP_INC ),
                  op_arg_dat(p_bound,-1,OP_ID  ,1,"int",  OP_READ),
                  bres_calc_tuner);

//    update flow field

      rms = 0.0;

      op_par_loop(update,"update",cells,
                  op_arg_dat(p_qold,-1,OP_ID, 4,REAL_STRING,OP_READ ),
                  op_arg_dat(p_q,   -1,OP_ID, 4,REAL_STRING,OP_WRITE),
                  op_arg_dat(p_res, -1,OP_ID, 4,REAL_STRING,OP_RW   ),
                  op_arg_dat(p_adt, -1,OP_ID, 1,REAL_STRING,OP_READ ),
                  op_arg_gbl(&rms,1,REAL_STRING,OP_INC),
                  update_tuner);
    }

//  print iteration history

    rms = sqrt(rms/(REAL) ncell);

    if ( iter % 100 == 0 )
      printf(" %d  %10.5e \n",iter,rms);
  }

  for ( int ll = 0; ll < 4*ncell; ll++ ) {
    printf ( "%lf\n", q[ll] );
  }

  op_timing_output();
}
Example #12
0
int main(int argc, char **argv)
{
  // OP initialisation
  op_init(argc,argv,2);

  //MPI for user I/O
  int my_rank;
  int comm_size;
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);

  int    *becell, *ecell,  *bound, *bedge, *edge, *cell;
  float  *x, *q, *qold, *adt, *res;

  int    nnode,ncell,nedge,nbedge;

  /**------------------------BEGIN  I/O -------------------**/

  char file[] = "new_grid.dat";
  char file_out[] = "new_grid_out.h5";

  /* read in grid from disk on root processor */
  FILE *fp;

  if ( (fp = fopen(file,"r")) == NULL) {
    op_printf("can't open file %s\n",file); exit(-1);
  }

  int   g_nnode,g_ncell,g_nedge,g_nbedge;

  check_scan(fscanf(fp,"%d %d %d %d \n",&g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4);

  int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0;
  float *g_x = 0,*g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0;

  // set constants

  op_printf("initialising flow field\n");
  gam = 1.4f;
  gm1 = gam - 1.0f;
  cfl = 0.9f;
  eps = 0.05f;

  float mach  = 0.4f;
  float alpha = 3.0f*atan(1.0f)/45.0f;
  float p     = 1.0f;
  float r     = 1.0f;
  float u     = sqrt(gam*p/r)*mach;
  float e     = p/(r*gm1) + 0.5f*u*u;

  qinf[0] = r;
  qinf[1] = r*u;
  qinf[2] = 0.0f;
  qinf[3] = r*e;

  op_printf("reading in grid \n");
  op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n"
      ,g_nnode,g_ncell,g_nedge,g_nbedge);

  if(my_rank == MPI_ROOT) {
    g_cell   = (int *) malloc(4*g_ncell*sizeof(int));
    g_edge   = (int *) malloc(2*g_nedge*sizeof(int));
    g_ecell  = (int *) malloc(2*g_nedge*sizeof(int));
    g_bedge  = (int *) malloc(2*g_nbedge*sizeof(int));
    g_becell = (int *) malloc(  g_nbedge*sizeof(int));
    g_bound  = (int *) malloc(  g_nbedge*sizeof(int));

    g_x      = (float *) malloc(2*g_nnode*sizeof(float));
    g_q      = (float *) malloc(4*g_ncell*sizeof(float));
    g_qold   = (float *) malloc(4*g_ncell*sizeof(float));
    g_res    = (float *) malloc(4*g_ncell*sizeof(float));
    g_adt    = (float *) malloc(  g_ncell*sizeof(float));

    for (int n=0; n<g_nnode; n++){
      check_scan(fscanf(fp,"%f %f \n",&g_x[2*n], &g_x[2*n+1]), 2);
    }

    for (int n=0; n<g_ncell; n++) {
      check_scan(fscanf(fp,"%d %d %d %d \n",&g_cell[4*n  ], &g_cell[4*n+1],
            &g_cell[4*n+2], &g_cell[4*n+3]), 4);
    }

    for (int n=0; n<g_nedge; n++) {
      check_scan(fscanf(fp,"%d %d %d %d \n",&g_edge[2*n],&g_edge[2*n+1],
            &g_ecell[2*n],&g_ecell[2*n+1]), 4);
    }

    for (int n=0; n<g_nbedge; n++) {
      check_scan(fscanf(fp,"%d %d %d %d \n",&g_bedge[2*n],&g_bedge[2*n+1],
            &g_becell[n],&g_bound[n]), 4);
    }

    //initialise flow field and residual

    for (int n=0; n<g_ncell; n++) {
      for (int m=0; m<4; m++) {
        g_q[4*n+m] = qinf[m];
        g_res[4*n+m] = 0.0f;
      }
    }
  }

  fclose(fp);

  nnode = compute_local_size (g_nnode, comm_size, my_rank);
  ncell = compute_local_size (g_ncell, comm_size, my_rank);
  nedge = compute_local_size (g_nedge, comm_size, my_rank);
  nbedge = compute_local_size (g_nbedge, comm_size, my_rank);

  op_printf("Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n"
      ,my_rank,nnode,ncell,nedge,nbedge);

  /*Allocate memory to hold local sets, mapping tables and data*/
  cell   = (int *) malloc(4*ncell*sizeof(int));
  edge   = (int *) malloc(2*nedge*sizeof(int));
  ecell  = (int *) malloc(2*nedge*sizeof(int));
  bedge  = (int *) malloc(2*nbedge*sizeof(int));
  becell = (int *) malloc(  nbedge*sizeof(int));
  bound  = (int *) malloc(  nbedge*sizeof(int));

  x      = (float *) malloc(2*nnode*sizeof(float));
  q      = (float *) malloc(4*ncell*sizeof(float));
  qold   = (float *) malloc(4*ncell*sizeof(float));
  res    = (float *) malloc(4*ncell*sizeof(float));
  adt    = (float *) malloc(  ncell*sizeof(float));

  /* scatter sets, mappings and data on sets*/
  scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4);
  scatter_int_array(g_edge, edge, comm_size, g_nedge,nedge, 2);
  scatter_int_array(g_ecell, ecell, comm_size, g_nedge,nedge, 2);
  scatter_int_array(g_bedge, bedge, comm_size, g_nbedge,nbedge, 2);
  scatter_int_array(g_becell, becell, comm_size, g_nbedge,nbedge, 1);
  scatter_int_array(g_bound, bound, comm_size, g_nbedge,nbedge, 1);

  scatter_float_array(g_x, x, comm_size, g_nnode,nnode, 2);
  scatter_float_array(g_q, q, comm_size, g_ncell,ncell, 4);
  scatter_float_array(g_qold, qold, comm_size, g_ncell,ncell, 4);
  scatter_float_array(g_res, res, comm_size, g_ncell,ncell, 4);
  scatter_float_array(g_adt, adt, comm_size, g_ncell,ncell, 1);

  /*Freeing memory allocated to gloabal arrays on rank 0
    after scattering to all processes*/
  if(my_rank == MPI_ROOT) {
    free(g_cell);
    free(g_edge);
    free(g_ecell);
    free(g_bedge);
    free(g_becell);
    free(g_bound);
    free(g_x );
    free(g_q);
    free(g_qold);
    free(g_adt);
    free(g_res);
  }

  /**------------------------END I/O  -----------------------**/

  /* FIXME: It's not clear to the compiler that sth. is going on behind the
     scenes here. Hence theses variables are reported as unused */

  op_set nodes  = op_decl_set(nnode,  "nodes");
  op_set edges  = op_decl_set(nedge,  "edges");
  op_set bedges = op_decl_set(nbedge, "bedges");
  op_set cells  = op_decl_set(ncell,  "cells");

  op_map pedge   = op_decl_map(edges, nodes,2,edge,  "pedge");
  op_map pecell  = op_decl_map(edges, cells,2,ecell, "pecell");
  op_map pbedge  = op_decl_map(bedges,nodes,2,bedge, "pbedge");
  op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell");
  op_map pcell   = op_decl_map(cells, nodes,4,cell,  "pcell");

  op_dat p_bound = op_decl_dat(bedges,1,"int"  ,bound,"p_bound");
  op_dat p_x     = op_decl_dat(nodes ,2,"float",x    ,"p_x");
  op_dat p_q     = op_decl_dat(cells ,4,"float",q    ,"p_q");
  op_dat p_qold  = op_decl_dat(cells ,4,"float",qold ,"p_qold");
  op_dat p_adt   = op_decl_dat(cells ,1,"float",adt  ,"p_adt");
  op_dat p_res   = op_decl_dat(cells ,4,"float",res  ,"p_res");

  op_decl_const(1,"float",&gam  );
  op_decl_const(1,"float",&gm1  );
  op_decl_const(1,"float",&cfl  );
  op_decl_const(1,"float",&eps  );
  op_decl_const(1,"float",&mach );
  op_decl_const(1,"float",&alpha);
  op_decl_const(4,"float",qinf  );

  op_dump_to_hdf5(file_out);
  op_write_const_hdf5("gam",  1,"float",(char *)&gam,  "new_grid_out.h5");
  op_write_const_hdf5("gm1",  1,"float",(char *)&gm1,  "new_grid_out.h5");
  op_write_const_hdf5("cfl",  1,"float",(char *)&cfl,  "new_grid_out.h5");
  op_write_const_hdf5("eps",  1,"float",(char *)&eps,  "new_grid_out.h5");
  op_write_const_hdf5("mach", 1,"float",(char *)&mach, "new_grid_out.h5");
  op_write_const_hdf5("alpha",1,"float",(char *)&alpha,"new_grid_out.h5");
  op_write_const_hdf5("qinf", 4,"float",(char *)qinf,  "new_grid_out.h5");

  //create halos - for sanity check
  op_halo_create();

  op_exit();
}
Example #13
0
int main(int argc, char **argv)
{
  // OP initialisation
  op_init(argc,argv,2);

  int    *becell, *ecell,  *bound, *bedge, *edge, *cell;
  double  *x, *q, *qold, *adt, *res;

  int    nnode,ncell,nedge,nbedge,niter;
  double  rms;

  //timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  // read in grid

  op_printf("reading in grid \n");

  FILE *fp;
  if ( (fp = fopen("./new_grid.dat","r")) == NULL) {
    op_printf("can't open file new_grid.dat\n"); exit(-1);
  }

  if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) {
    op_printf("error reading from new_grid.dat\n"); exit(-1);
  }

  cell   = (int *) malloc(4*ncell*sizeof(int));
  edge   = (int *) malloc(2*nedge*sizeof(int));
  ecell  = (int *) malloc(2*nedge*sizeof(int));
  bedge  = (int *) malloc(2*nbedge*sizeof(int));
  becell = (int *) malloc(  nbedge*sizeof(int));
  bound  = (int *) malloc(  nbedge*sizeof(int));

  x      = (double *) malloc(2*nnode*sizeof(double));
  q      = (double *) malloc(4*ncell*sizeof(double));
  qold   = (double *) malloc(4*ncell*sizeof(double));
  res    = (double *) malloc(4*ncell*sizeof(double));
  adt    = (double *) malloc(  ncell*sizeof(double));

  for (int n=0; n<nnode; n++) {
    if (fscanf(fp,"%lf %lf \n",&x[2*n], &x[2*n+1]) != 2) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<ncell; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&cell[4*n  ], &cell[4*n+1],
                                   &cell[4*n+2], &cell[4*n+3]) != 4) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<nedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1],
                                   &ecell[2*n],&ecell[2*n+1]) != 4) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<nbedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1],
                                   &becell[n], &bound[n]) != 4) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  fclose(fp);

  // set constants and initialise flow field and residual

  op_printf("initialising flow field \n");

  gam = 1.4f;
  gm1 = gam - 1.0f;
  cfl = 0.9f;
  eps = 0.05f;

  double mach  = 0.4f;
  double alpha = 3.0f*atan(1.0f)/45.0f;
  double p     = 1.0f;
  double r     = 1.0f;
  double u     = sqrt(gam*p/r)*mach;
  double e     = p/(r*gm1) + 0.5f*u*u;

  qinf[0] = r;
  qinf[1] = r*u;
  qinf[2] = 0.0f;
  qinf[3] = r*e;

  for (int n=0; n<ncell; n++) {
    for (int m=0; m<4; m++) {
        q[4*n+m] = qinf[m];
      res[4*n+m] = 0.0f;
    }
  }

  // declare sets, pointers, datasets and global constants

  op_set nodes  = op_decl_set(nnode,  "nodes");
  op_set edges  = op_decl_set(nedge,  "edges");
  op_set bedges = op_decl_set(nbedge, "bedges");
  op_set cells  = op_decl_set(ncell,  "cells");

  op_map pedge   = op_decl_map(edges, nodes,2,edge,  "pedge");
  op_map pecell  = op_decl_map(edges, cells,2,ecell, "pecell");
  op_map pbedge  = op_decl_map(bedges,nodes,2,bedge, "pbedge");
  op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell");
  op_map pcell   = op_decl_map(cells, nodes,4,cell,  "pcell");

  op_dat p_bound = op_decl_dat(bedges,1,"int"  ,bound,"p_bound");
  op_dat p_x     = op_decl_dat(nodes ,2,"double",x    ,"p_x");
  op_dat p_q     = op_decl_dat(cells ,4,"double",q    ,"p_q");
  op_dat p_qold  = op_decl_dat(cells ,4,"double",qold ,"p_qold");
  op_dat p_adt   = op_decl_dat(cells ,1,"double",adt  ,"p_adt");
  op_dat p_res   = op_decl_dat(cells ,4,"double",res  ,"p_res");

  op_decl_const(1,"double",&gam  );
  op_decl_const(1,"double",&gm1  );
  op_decl_const(1,"double",&cfl  );
  op_decl_const(1,"double",&eps  );
  op_decl_const(1,"double",&mach );
  op_decl_const(1,"double",&alpha);
  op_decl_const(4,"double",qinf  );

  op_diagnostic_output();

  //initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  // main time-marching loop

  niter = 1000;

  for(int iter=1; iter<=niter; iter++) {

    // save old flow solution

    op_par_loop(save_soln,"save_soln", cells,
      op_arg_dat(p_q,   -1,OP_ID, 4,"double",OP_READ ),
      op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE));

    // predictor/corrector update loop

    for(int k=0; k<2; k++) {

      // calculate area/timstep

      op_par_loop(adt_calc,"adt_calc",cells,
          op_arg_dat(p_x,   0,pcell, 2,"double",OP_READ ),
          op_arg_dat(p_x,   1,pcell, 2,"double",OP_READ ),
          op_arg_dat(p_x,   2,pcell, 2,"double",OP_READ ),
          op_arg_dat(p_x,   3,pcell, 2,"double",OP_READ ),
          op_arg_dat(p_q,  -1,OP_ID, 4,"double",OP_READ ),
          op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE));

      // calculate flux residual

      op_par_loop(res_calc,"res_calc",edges,
          op_arg_dat(p_x,    0,pedge, 2,"double",OP_READ),
          op_arg_dat(p_x,    1,pedge, 2,"double",OP_READ),
          op_arg_dat(p_q,    0,pecell,4,"double",OP_READ),
          op_arg_dat(p_q,    1,pecell,4,"double",OP_READ),
          op_arg_dat(p_adt,  0,pecell,1,"double",OP_READ),
          op_arg_dat(p_adt,  1,pecell,1,"double",OP_READ),
          op_arg_dat(p_res,  0,pecell,4,"double",OP_INC ),
          op_arg_dat(p_res,  1,pecell,4,"double",OP_INC ));

      op_par_loop(bres_calc,"bres_calc",bedges,
          op_arg_dat(p_x,     0,pbedge, 2,"double",OP_READ),
          op_arg_dat(p_x,     1,pbedge, 2,"double",OP_READ),
          op_arg_dat(p_q,     0,pbecell,4,"double",OP_READ),
          op_arg_dat(p_adt,   0,pbecell,1,"double",OP_READ),
          op_arg_dat(p_res,   0,pbecell,4,"double",OP_INC ),
          op_arg_dat(p_bound,-1,OP_ID  ,1,"int",  OP_READ));

      // update flow field

      rms = 0.0;

      op_par_loop(update,"update",cells,
          op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ),
          op_arg_dat(p_q,   -1,OP_ID, 4,"double",OP_WRITE),
          op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW   ),
          op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ),
          op_arg_gbl(&rms,1,"double",OP_INC));
    }

    // print iteration history
    rms = sqrt(rms/(double) op_get_size(cells));
    if (iter%100 == 0)
      op_printf(" %d  %10.5e \n",iter,rms);
  }

  op_timers(&cpu_t2, &wall_t2);

  //output the result dat array to files
  op_print_dat_to_txtfile(p_q, "out_grid_seq.dat"); //ASCI
  op_print_dat_to_binfile(p_q, "out_grid_seq.bin"); //Binary

  op_timing_output();
  op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1);

  op_exit();

  free(cell);
  free(edge);
  free(ecell);
  free(bedge);
  free(becell);
  free(bound);
  free(x);
  free(q);
  free(qold);
  free(res);
  free(adt);
}
Example #14
0
int main(int argc, char **argv)
{
  // OP initialisation
  op_init(argc,argv,2);

  int    *becell, *ecell,  *bound, *bedge, *edge, *cell;
  double  *x, *q, *qold, *adt, *res;

  int    nnode,ncell,nedge,nbedge;

  //timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  // read in airfoil grid

  op_printf("reading in data \n");

  FILE *fp;
  if ( (fp = fopen("./new_grid.dat","r")) == NULL) {
    op_printf("can't open file new_grid.dat\n"); exit(-1);
  }

  if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) {
    op_printf("error reading from new_grid.dat\n"); exit(-1);
  }

  cell   = (int *) malloc(4*ncell*sizeof(int));
  edge   = (int *) malloc(2*nedge*sizeof(int));
  ecell  = (int *) malloc(2*nedge*sizeof(int));
  bedge  = (int *) malloc(2*nbedge*sizeof(int));
  becell = (int *) malloc(  nbedge*sizeof(int));
  bound  = (int *) malloc(  nbedge*sizeof(int));

  x      = (double *) malloc(2*nnode*sizeof(double));
  q      = (double *) malloc(4*ncell*sizeof(double));
  qold   = (double *) malloc(4*ncell*sizeof(double));
  res    = (double *) malloc(4*ncell*sizeof(double));
  adt    = (double *) malloc(  ncell*sizeof(double));

  for (int n=0; n<nnode; n++) {
    if (fscanf(fp,"%lf %lf \n",&x[2*n], &x[2*n+1]) != 2) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<ncell; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&cell[4*n  ], &cell[4*n+1],
        &cell[4*n+2], &cell[4*n+3]) != 4) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<nedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1],
        &ecell[2*n],&ecell[2*n+1]) != 4) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<nbedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1],
        &becell[n], &bound[n]) != 4) {
      op_printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  fclose(fp);

  // declare sets, pointers, datasets

  op_set edges  = op_decl_set(nedge,  "edges");
  op_set cells  = op_decl_set(ncell,  "cells");

  op_map pecell  = op_decl_map(edges, cells,2,ecell, "pecell");
  op_dat p_res   = op_decl_dat(cells ,4,"double",res  ,"p_res");

  int count;

  op_diagnostic_output();

  //initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  //indirect reduction
  count = 0;
  op_par_loop_res_calc("res_calc",edges,
              op_arg_dat(p_res,0,pecell,4,"double",OP_INC),
              op_arg_gbl(&count,1,"int",OP_INC));
  op_printf("number of edges:: %d should be: %d \n",count,nedge);
  if (count != nedge) op_printf("indirect reduction FAILED\n");
  else op_printf("indirect reduction PASSED\n");
  //direct reduction
  count = 0;
  op_par_loop_update("update",cells,
              op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW),
              op_arg_gbl(&count,1,"int",OP_INC));
  op_printf("number of cells: %d should be: %d \n",count,ncell);
  if (count != ncell) op_printf("direct reduction FAILED\n");
  else op_printf("direct reduction PASSED\n");

  op_timers(&cpu_t2, &wall_t2);
  op_timing_output();

  op_exit();

  free(cell);
  free(edge);
  free(ecell);
  free(bedge);
  free(becell);
  free(bound);
  free(x);
  free(q);
  free(qold);
  free(res);
  free(adt);
}
int main(int argc, char **argv){

  int    *becell, *ecell,  *bound, *bedge, *edge, *cell;
  float  *x, *q, *qold, *adt, *res;

  int    nnode,ncell,nedge,nbedge,niter;
  float  rms;

  // read in grid

  printf("reading in grid \n");

  FILE *fp;
  if ( (fp = fopen("/work/rr908/airfoil/new_grid.dat","r")) == NULL) {
    printf("can't open file new_grid.dat\n"); exit(-1);
  }

  if (fscanf(fp,"%d %d %d %d \n",&nnode, &ncell, &nedge, &nbedge) != 4) {
    printf("error reading from new_grid.dat\n"); exit(-1);
  }

  cell   = (int *) malloc(4*ncell*sizeof(int));
  edge   = (int *) malloc(2*nedge*sizeof(int));
  ecell  = (int *) malloc(2*nedge*sizeof(int));
  bedge  = (int *) malloc(2*nbedge*sizeof(int));
  becell = (int *) malloc(  nbedge*sizeof(int));
  bound  = (int *) malloc(  nbedge*sizeof(int));

  x      = (float *) malloc(2*nnode*sizeof(float));
  q      = (float *) malloc(4*ncell*sizeof(float));
  qold   = (float *) malloc(4*ncell*sizeof(float));
  res    = (float *) malloc(4*ncell*sizeof(float));
  adt    = (float *) malloc(  ncell*sizeof(float));

  for (int n=0; n<nnode; n++) {
    if (fscanf(fp,"%f %f \n",&x[2*n], &x[2*n+1]) != 2) {
      printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<ncell; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&cell[4*n  ], &cell[4*n+1],
                                   &cell[4*n+2], &cell[4*n+3]) != 4) {
      printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<nedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&edge[2*n], &edge[2*n+1],
                                   &ecell[2*n],&ecell[2*n+1]) != 4) {
      printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  for (int n=0; n<nbedge; n++) {
    if (fscanf(fp,"%d %d %d %d \n",&bedge[2*n],&bedge[2*n+1],
                                   &becell[n], &bound[n]) != 4) {
      printf("error reading from new_grid.dat\n"); exit(-1);
    }
  }

  fclose(fp);

#ifdef DIAGNOSTIC
  print_array((float *) x, nnode, "initial_nodes");
  print_array((float *) cell, ncell, "initial_cells");
 
  FILE *flog;
  flog = fopen( "initial_cells_cellarray", "w" );
  for( int i=0; i< ncell; ++i ) {
    fprintf( flog, "%d %d %d %d\n", cell[4*i], cell[4*i+1], cell[4*i+2], cell[4*i+3] );
  }
  fclose( flog );


  print_array((float *) edge, nedge, "initial_edges");
  print_array((float *) ecell, nedge, "initiall_edges_for_cell");
  print_array((float *) bedge, nbedge, "initial_border_edges");
  print_array((float *) becell, nbedge, "initial_becell");
  print_array((float *) bound, nbedge, "initial bound");
#endif

  // set constants and initialise flow field and residual

  printf("initialising flow field \n");

  g_const.gam = 1.4f;
  g_const.gm1 = g_const.gam - 1.0f;
  g_const.cfl = 0.9f;
  g_const.eps = 0.05f;

  g_const.mach  = 0.4f;
  g_const.alpha = 3.0f*atan(1.0f)/45.0f;  
  float p     = 1.0f;
  float r     = 1.0f;
  float u     = sqrt(g_const.gam*p/r)*g_const.mach;
  float e     = p/(r*g_const.gm1) + 0.5f*u*u;

  g_const.qinf[0] = r;
  g_const.qinf[1] = r*u;
  g_const.qinf[2] = 0.0f;
  g_const.qinf[3] = r*e;

  for (int n=0; n<ncell; n++) {
    for (int m=0; m<4; m++) {
        q[4*n+m] = g_const.qinf[m];
      res[4*n+m] = 0.0f;
    }
  }




  // OP initialisation

  printf("OP initialisation\n");
  op_init(argc,argv,2);
  g_const_d = op_allocate_constant( &g_const, sizeof( struct global_constants ) );

  // declare sets, pointers, datasets and global constants

  op_set nodes  = op_decl_set(nnode,  "nodes");
  op_set edges  = op_decl_set(nedge,  "edges");
  op_set bedges = op_decl_set(nbedge, "bedges");
  op_set cells  = op_decl_set(ncell,  "cells");

  op_map pedge   = op_decl_map(edges, nodes,2,edge,  "pedge");
  op_map pecell  = op_decl_map(edges, cells,2,ecell, "pecell");
  op_map pbedge  = op_decl_map(bedges,nodes,2,bedge, "pbedge");
  op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell");
  op_map pcell   = op_decl_map(cells, nodes,4,cell,  "pcell");

  op_dat p_bound = op_decl_dat(bedges,1,"int"  ,bound,"p_bound");
  op_dat p_x     = op_decl_dat(nodes ,2,"float",x    ,"p_x");
  op_dat p_q     = op_decl_dat(cells ,4,"float",q    ,"p_q");
  op_dat p_qold  = op_decl_dat(cells ,4,"float",qold ,"p_qold");
  op_dat p_adt   = op_decl_dat(cells ,1,"float",adt  ,"p_adt");
  op_dat p_res   = op_decl_dat(cells ,4,"float",res  ,"p_res");

  op_decl_const2("gam",1,"float",&g_const.gam  );
  op_decl_const2("gm1",1,"float",&g_const.gm1  );
  op_decl_const2("cfl",1,"float",&g_const.cfl  );
  op_decl_const2("eps",1,"float",&g_const.eps  );
  op_decl_const2("mach",1,"float",&g_const.mach );
  op_decl_const2("alpha",1,"float",&g_const.alpha);
  op_decl_const2("qinf",4,"float",g_const.qinf  );


  op_diagnostic_output();

#ifdef DIAGNOSTIC
  dump_array(p_bound, "initial_dat_p_bound");
  dump_array(p_x, "initial_dat_p_x");
  dump_array(p_q, "initiall_dat_p_q");
  dump_array(p_qold, "initial_dat_p_qold");
  dump_array(p_adt, "initial_dat_p_adt");
  dump_array(p_res, "initial_dat_res");
#endif

// main time-marching loop
  niter = 1000;
  for(int iter=1; iter<=niter; iter++) {

//  save old flow solution
 // dump_array(p_q, "p_q_iter_before");
 // dump_array(p_qold, "p_q_old_iter_before");

    op_par_loop_save_soln("save_soln", cells,
                op_arg_dat(p_q,   -1,OP_ID, 4,"float",OP_READ ),
                op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_WRITE));


//  dump_array(p_q, "p_q_iter_after");
//  dump_array(p_qold, "p_q_old_iter_after");

/*    if ( iter == 1 ) {
      dump_array( p_qold, "p_qold" );
    }
    */

#ifdef DIAGNOSTIC
    if (iter==1) {
      dump_array( p_qold, "p_qold" );
    }
#endif
    //dump_array( p_qold, "p_qold" );
    //op_fetch_data( p_qold );
    //print_array( ( float *) p_qold->data, 4*p_qold->set->size, "p_qold" );
//    print_array( p_q, "p_qold2" );
//    print_array( p_qold, "p_qold" );

    //assert( p_q->data[0] != 0.0f );

//  predictor/corrector update loop

  //  dump_array(p_adt, "p_adt_before");
    for(int k=0; k<2; k++) {
//    calculate area/timstep
      if(k == 0 && iter == 0) {
        printf("Dumping adt before adt_calc execution array");
         op_fetch_data( p_adt );
	 float* array = (float *) p_adt->data;
         long size = p_adt->set->size;
         for(long elem = 0; elem < size; ++elem) {
           printf("%lf",array[elem]);
         }
      }

      op_par_loop_adt_calc("adt_calc",cells,
                  op_arg_dat(p_x,   0,pcell, 2,"float",OP_READ ),
                  op_arg_dat(p_x,   1,pcell, 2,"float",OP_READ ),
                  op_arg_dat(p_x,   2,pcell, 2,"float",OP_READ ),
                  op_arg_dat(p_x,   3,pcell, 2,"float",OP_READ ),
                  op_arg_dat(p_q,  -1,OP_ID, 4,"float",OP_READ ),
                  op_arg_dat(p_adt,-1,OP_ID, 1,"float",OP_WRITE));
      
      if(k == 0 && iter == 0) {
        printf("Dumping adt after 1x adt_calc execution array");
         op_fetch_data( p_adt );
         float* array = (float *) p_adt->data;
         long size = p_adt->set->size;
         for(long elem = 0; elem < size; ++elem) {
           printf("%lf",array[elem]);
         }
      }


#ifdef DIAGNOSTIC
    if (iter==1 && k==0) {
      dump_array( p_adt, "p_adt0" );
    }
    if (iter==1 && k==1) {
      dump_array( p_adt, "p_adt1" );
    }
#endif
  //  dump_array(p_adt, "p_adt_after");
//    calculate flux residual

      op_par_loop_res_calc("res_calc",edges,
                  op_arg_dat(p_x,    0,pedge, 2,"float",OP_READ),
                  op_arg_dat(p_x,    1,pedge, 2,"float",OP_READ),
                  op_arg_dat(p_q,    0,pecell,4,"float",OP_READ),
                  op_arg_dat(p_q,    1,pecell,4,"float",OP_READ),
                  op_arg_dat(p_adt,  0,pecell,1,"float",OP_READ),
                  op_arg_dat(p_adt,  1,pecell,1,"float",OP_READ),
                  op_arg_dat(p_res,  0,pecell,4,"float",OP_INC ),
                  op_arg_dat(p_res,  1,pecell,4,"float",OP_INC ));

#ifdef DIAGNOSTIC
    if (iter==1 && k==0) {
      dump_array( p_res, "p_res0" );
    }
    if (iter==1 && k==1) {
      dump_array( p_res, "p_res1" );
    }
#endif

      op_par_loop_bres_calc("bres_calc",bedges,
                  op_arg_dat(p_x,     0,pbedge, 2,"float",OP_READ),
                  op_arg_dat(p_x,     1,pbedge, 2,"float",OP_READ),
                  op_arg_dat(p_q,     0,pbecell,4,"float",OP_READ),
                  op_arg_dat(p_adt,   0,pbecell,1,"float",OP_READ),
                  op_arg_dat(p_res,   0,pbecell,4,"float",OP_INC ),
                  op_arg_dat(p_bound,-1,OP_ID  ,1,"int",  OP_READ));

#ifdef DIAGNOSTIC
    if (iter==1 && k==0) {
      dump_array( p_res, "p_res_a0" );
    }
    if (iter==1 && k==0) {
      dump_array( p_res, "p_res_a1" );
    }
#endif
//    update flow field

      rms = 0.0;

      op_par_loop_update("update",cells,
                  op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_READ ),
                  op_arg_dat(p_q,   -1,OP_ID, 4,"float",OP_WRITE),
                  op_arg_dat(p_res, -1,OP_ID, 4,"float",OP_RW   ),
                  op_arg_dat(p_adt, -1,OP_ID, 1,"float",OP_READ ),
                  op_arg_gbl(&rms,1,"float",OP_INC));
    }

#ifdef DIAGNOSTIC
    if (iter==1) {
      dump_array( p_q, "p_q1" );
    }
#endif

//  print iteration history

    rms = sqrt(rms/(float) ncell);

    if (iter%100 == 0)
      printf(" %d  %10.5e \n",iter,rms);



  }

  op_timing_output();

#ifdef DIAGNOSTIC
  dump_array( p_q, "p_q" );
#endif



}
Example #16
0
op_dat op_decl_dat_hdf5(op_set set, int dim, char const *type, char const *file, char const *name)
{
  //create new communicator
  int my_rank, comm_size;
  MPI_Comm_dup(MPI_COMM_WORLD, &OP_MPI_HDF5_WORLD);
  MPI_Comm_rank(OP_MPI_HDF5_WORLD, &my_rank);
  MPI_Comm_size(OP_MPI_HDF5_WORLD, &comm_size);

  //MPI variables
  MPI_Info info  = MPI_INFO_NULL;

  //HDF5 APIs definitions
  hid_t       file_id; //file identifier
  hid_t plist_id;  //property list identifier
  hid_t dset_id; //dataset identifier
  hid_t       dataspace; //data space identifier
  hid_t       memspace; //memory space identifier

  hsize_t count[2]; //hyperslab selection parameters
  hsize_t offset[2];
  hid_t attr;   //attribute identifier

  //Set up file access property list with parallel I/O access
  plist_id = H5Pcreate(H5P_FILE_ACCESS);
  H5Pset_fapl_mpio(plist_id, OP_MPI_HDF5_WORLD, info);

  file_id = H5Fopen(file, H5F_ACC_RDONLY, plist_id );
  H5Pclose(plist_id);


  /*find element size of this dat with available attributes*/
  size_t dat_size = 0;
  //open existing data set
  dset_id = H5Dopen(file_id, name, H5P_DEFAULT);
  //get OID of the attribute
  attr = H5Aopen(dset_id, "size", H5P_DEFAULT);
  //read attribute
  H5Aread(attr,H5T_NATIVE_INT,&dat_size);
  H5Aclose(attr);
  H5Dclose(dset_id);


  /*find dim with available attributes*/
  int dat_dim = 0;
  //open existing data set
  dset_id = H5Dopen(file_id, name, H5P_DEFAULT);
  //get OID of the attribute
  attr = H5Aopen(dset_id, "dim", H5P_DEFAULT);
  //read attribute
  H5Aread(attr,H5T_NATIVE_INT,&dat_dim);
  H5Aclose(attr);
  H5Dclose(dset_id);
  if(dat_dim != dim)
  {
    printf("dat.dim %d in file %s and dim %d do not match\n",dat_dim,file,dim);
    MPI_Abort(OP_MPI_HDF5_WORLD, 2);
  }

  /*find type with available attributes*/
  dataspace= H5Screate(H5S_SCALAR);
  hid_t  atype = H5Tcopy(H5T_C_S1);
  H5Tset_size(atype, 10);
  //open existing data set
  dset_id = H5Dopen(file_id, name, H5P_DEFAULT);
  //get OID of the attribute
  attr = H5Aopen(dset_id, "type", H5P_DEFAULT);
  //read attribute
  char typ[10];
  H5Aread(attr,atype,typ);
  H5Aclose(attr);
  H5Sclose(dataspace);
  H5Dclose(dset_id);
  if(strcmp(typ,type) != 0)
  {
    printf("dat.type %s in file %s and type %s do not match\n",typ,file,type);
    MPI_Abort(OP_MPI_HDF5_WORLD, 2);
  }


  /*read in dat in hyperslabs*/

  //Create the dataset with default properties and close dataspace.
  dset_id = H5Dopen(file_id, name, H5P_DEFAULT);

  //Each process defines dataset in memory and reads from a hyperslab in the file.
  int disp = 0;
  int* sizes = (int *)xmalloc(sizeof(int)*comm_size);
  MPI_Allgather(&(set->size), 1, MPI_INT, sizes, 1, MPI_INT, OP_MPI_HDF5_WORLD);
  for(int i = 0; i<my_rank; i++)disp = disp + sizes[i];
  free(sizes);

  count[0] = set->size;
  count[1] = dim;
  offset[0] = disp;
  offset[1] = 0;
  memspace = H5Screate_simple(2, count, NULL);

  //Select hyperslab in the file.
  dataspace = H5Dget_space(dset_id);
  H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset, NULL, count, NULL);

  //Create property list for collective dataset write.
  plist_id = H5Pcreate(H5P_DATASET_XFER);
  H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);

  //initialize data buffer and read in data
  char* data = 0;
  if(strcmp(type,"double") == 0)
  {
    data = (char *)xmalloc(set->size*dim*sizeof(double));
    H5Dread(dset_id, H5T_NATIVE_DOUBLE, memspace, dataspace, plist_id, data);

    if(dat_size != dim*sizeof(double))
    {
      printf("dat.size %lu in file %s and %d*sizeof(double) do not match\n",dat_size,file,dim);
      MPI_Abort(OP_MPI_HDF5_WORLD, 2);
    }
    else
      dat_size = sizeof(double);

  }else if(strcmp(type,"float") == 0)
  {
    data = (char *)xmalloc(set->size*dim*sizeof(float));
    H5Dread(dset_id, H5T_NATIVE_FLOAT, memspace, dataspace, plist_id, data);

    if(dat_size != dim*sizeof(float))
    {
      printf("dat.size %lu in file %s and %d*sizeof(float) do not match\n",dat_size,file,dim);
      MPI_Abort(OP_MPI_HDF5_WORLD, 2);
    }
    else
      dat_size = sizeof(float);

  }else if(strcmp(type,"int") == 0)
  {
    data = (char *)xmalloc(set->size*dim*sizeof(int));
    H5Dread(dset_id, H5T_NATIVE_INT, memspace, dataspace, plist_id, data);

    if(dat_size != dim*sizeof(int))
    {
      printf("dat.size %lu in file %s and %d*sizeof(int) do not match\n",dat_size,file,dim);
      MPI_Abort(OP_MPI_HDF5_WORLD, 2);
    }
    else
      dat_size = sizeof(int);
  }else
  {
    printf("unknown type\n");
    MPI_Abort(OP_MPI_HDF5_WORLD, 2);
  }

  H5Pclose(plist_id);
  H5Sclose(memspace);
  H5Sclose(dataspace);
  H5Dclose(dset_id);

  H5Fclose(file_id);
  MPI_Comm_free(&OP_MPI_HDF5_WORLD);

  return op_decl_dat(set, dim, type, dat_size, data, name );
}