Exemplo n.º 1
0
int main(int argc, char **argv)
{
  // OP initialisation
  op_init(argc,argv,2);

  //MPI for user I/O
  int my_rank;
  int comm_size;
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);

  //timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  int    *becell, *ecell,  *bound, *bedge, *edge, *cell;
  float  *x, *q, *qold, *adt, *res;

  int    nnode,ncell,nedge,nbedge,niter;
  float  rms;

  /**------------------------BEGIN I/O and PARTITIONING -------------------**/

  op_timers(&cpu_t1, &wall_t1);

  /* read in grid from disk on root processor */
  FILE *fp;

  if ( (fp = fopen("new_grid.dat","r")) == NULL) {
    op_printf("can't open file new_grid.dat\n"); exit(-1);
  }

  int   g_nnode,g_ncell,g_nedge,g_nbedge;

  check_scan(fscanf(fp,"%d %d %d %d \n",&g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4);

  int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0;
  float *g_x = 0,*g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0;

  // set constants

  op_printf("initialising flow field\n");
  gam = 1.4f;
  gm1 = gam - 1.0f;
  cfl = 0.9f;
  eps = 0.05f;

  float mach  = 0.4f;
  float alpha = 3.0f*atan(1.0f)/45.0f;
  float p     = 1.0f;
  float r     = 1.0f;
  float u     = sqrt(gam*p/r)*mach;
  float e     = p/(r*gm1) + 0.5f*u*u;

  qinf[0] = r;
  qinf[1] = r*u;
  qinf[2] = 0.0f;
  qinf[3] = r*e;

  op_printf("reading in grid \n");
  op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n"
      ,g_nnode,g_ncell,g_nedge,g_nbedge);

  if(my_rank == MPI_ROOT) {
    g_cell   = (int *) malloc(4*g_ncell*sizeof(int));
    g_edge   = (int *) malloc(2*g_nedge*sizeof(int));
    g_ecell  = (int *) malloc(2*g_nedge*sizeof(int));
    g_bedge  = (int *) malloc(2*g_nbedge*sizeof(int));
    g_becell = (int *) malloc(  g_nbedge*sizeof(int));
    g_bound  = (int *) malloc(  g_nbedge*sizeof(int));

    g_x      = (float *) malloc(2*g_nnode*sizeof(float));
    g_q      = (float *) malloc(4*g_ncell*sizeof(float));
    g_qold   = (float *) malloc(4*g_ncell*sizeof(float));
    g_res    = (float *) malloc(4*g_ncell*sizeof(float));
    g_adt    = (float *) malloc(  g_ncell*sizeof(float));

    for (int n=0; n<g_nnode; n++){
      check_scan(fscanf(fp,"%f %f \n",&g_x[2*n], &g_x[2*n+1]), 2);
    }

    for (int n=0; n<g_ncell; n++) {
      check_scan(fscanf(fp,"%d %d %d %d \n",&g_cell[4*n  ], &g_cell[4*n+1],
            &g_cell[4*n+2], &g_cell[4*n+3]), 4);
    }

    for (int n=0; n<g_nedge; n++) {
      check_scan(fscanf(fp,"%d %d %d %d \n",&g_edge[2*n],&g_edge[2*n+1],
            &g_ecell[2*n],&g_ecell[2*n+1]), 4);
    }

    for (int n=0; n<g_nbedge; n++) {
      check_scan(fscanf(fp,"%d %d %d %d \n",&g_bedge[2*n],&g_bedge[2*n+1],
            &g_becell[n],&g_bound[n]), 4);
    }

    //initialise flow field and residual

    for (int n=0; n<g_ncell; n++) {
      for (int m=0; m<4; m++) {
        g_q[4*n+m] = qinf[m];
        g_res[4*n+m] = 0.0f;
      }
    }
  }

  fclose(fp);

  nnode = compute_local_size (g_nnode, comm_size, my_rank);
  ncell = compute_local_size (g_ncell, comm_size, my_rank);
  nedge = compute_local_size (g_nedge, comm_size, my_rank);
  nbedge = compute_local_size (g_nbedge, comm_size, my_rank);

  op_printf("Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n"
      ,my_rank,nnode,ncell,nedge,nbedge);

  /*Allocate memory to hold local sets, mapping tables and data*/
  cell   = (int *) malloc(4*ncell*sizeof(int));
  edge   = (int *) malloc(2*nedge*sizeof(int));
  ecell  = (int *) malloc(2*nedge*sizeof(int));
  bedge  = (int *) malloc(2*nbedge*sizeof(int));
  becell = (int *) malloc(  nbedge*sizeof(int));
  bound  = (int *) malloc(  nbedge*sizeof(int));

  x      = (float *) malloc(2*nnode*sizeof(float));
  q      = (float *) malloc(4*ncell*sizeof(float));
  qold   = (float *) malloc(4*ncell*sizeof(float));
  res    = (float *) malloc(4*ncell*sizeof(float));
  adt    = (float *) malloc(  ncell*sizeof(float));

  /* scatter sets, mappings and data on sets*/
  scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4);
  scatter_int_array(g_edge, edge, comm_size, g_nedge,nedge, 2);
  scatter_int_array(g_ecell, ecell, comm_size, g_nedge,nedge, 2);
  scatter_int_array(g_bedge, bedge, comm_size, g_nbedge,nbedge, 2);
  scatter_int_array(g_becell, becell, comm_size, g_nbedge,nbedge, 1);
  scatter_int_array(g_bound, bound, comm_size, g_nbedge,nbedge, 1);

  scatter_float_array(g_x, x, comm_size, g_nnode,nnode, 2);
  scatter_float_array(g_q, q, comm_size, g_ncell,ncell, 4);
  scatter_float_array(g_qold, qold, comm_size, g_ncell,ncell, 4);
  scatter_float_array(g_res, res, comm_size, g_ncell,ncell, 4);
  scatter_float_array(g_adt, adt, comm_size, g_ncell,ncell, 1);

  /*Freeing memory allocated to gloabal arrays on rank 0
    after scattering to all processes*/
  if(my_rank == MPI_ROOT) {
    free(g_cell);
    free(g_edge);
    free(g_ecell);
    free(g_bedge);
    free(g_becell);
    free(g_bound);
    free(g_x );
    free(g_q);
    free(g_qold);
    free(g_adt);
    free(g_res);
  }

  op_timers(&cpu_t2, &wall_t2);
  op_printf("Max total file read time = %f\n", wall_t2-wall_t1);

  /**------------------------END I/O and PARTITIONING -----------------------**/

  // declare sets, pointers, datasets and global constants

  op_set nodes  = op_decl_set(nnode,  "nodes");
  op_set edges  = op_decl_set(nedge,  "edges");
  op_set bedges = op_decl_set(nbedge, "bedges");
  op_set cells  = op_decl_set(ncell,  "cells");

  op_map pedge   = op_decl_map(edges, nodes,2,edge,  "pedge");
  op_map pecell  = op_decl_map(edges, cells,2,ecell, "pecell");
  op_map pbedge  = op_decl_map(bedges,nodes,2,bedge, "pbedge");
  op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell");
  op_map pcell   = op_decl_map(cells, nodes,4,cell,  "pcell");

  op_dat p_bound = op_decl_dat(bedges,1,"int"  ,bound,"p_bound");
  op_dat p_x     = op_decl_dat(nodes ,2,"float",x    ,"p_x");
  op_dat p_q     = op_decl_dat(cells ,4,"float",q    ,"p_q");
  op_dat p_qold  = op_decl_dat(cells ,4,"float",qold ,"p_qold");
  op_dat p_adt   = op_decl_dat(cells ,1,"float",adt  ,"p_adt");
  op_dat p_res   = op_decl_dat(cells ,4,"float",res  ,"p_res");

  op_decl_const(1,"float",&gam  );
  op_decl_const(1,"float",&gm1  );
  op_decl_const(1,"float",&cfl  );
  op_decl_const(1,"float",&eps  );
  op_decl_const(1,"float",&mach );
  op_decl_const(1,"float",&alpha);
  op_decl_const(4,"float",qinf  );

  op_diagnostic_output();

  //trigger partitioning and halo creation routines
  op_partition("PTSCOTCH", "KWAY", NULL, pecell, p_x);

  //initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  niter = 1000;
  for(int iter=1; iter<=niter; iter++) {

    //save old flow solution
    op_par_loop(save_soln,"save_soln", cells,
        op_arg_dat(p_q,   -1,OP_ID, 4,"float",OP_READ ),
        op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_WRITE));

    //  predictor/corrector update loop

    for(int k=0; k<2; k++) {

      //    calculate area/timstep
      op_par_loop(adt_calc,"adt_calc",cells,
          op_arg_dat(p_x,   0,pcell, 2,"float",OP_READ ),
          op_arg_dat(p_x,   1,pcell, 2,"float",OP_READ ),
          op_arg_dat(p_x,   2,pcell, 2,"float",OP_READ ),
          op_arg_dat(p_x,   3,pcell, 2,"float",OP_READ ),
          op_arg_dat(p_q,  -1,OP_ID, 4,"float",OP_READ ),
          op_arg_dat(p_adt,-1,OP_ID, 1,"float",OP_WRITE));

      //    calculate flux residual
      op_par_loop(res_calc,"res_calc",edges,
          op_arg_dat(p_x,    0,pedge, 2,"float",OP_READ),
          op_arg_dat(p_x,    1,pedge, 2,"float",OP_READ),
          op_arg_dat(p_q,    0,pecell,4,"float",OP_READ),
          op_arg_dat(p_q,    1,pecell,4,"float",OP_READ),
          op_arg_dat(p_adt,  0,pecell,1,"float",OP_READ),
          op_arg_dat(p_adt,  1,pecell,1,"float",OP_READ),
          op_arg_dat(p_res,  0,pecell,4,"float",OP_INC ),
          op_arg_dat(p_res,  1,pecell,4,"float",OP_INC ));

      op_par_loop(bres_calc,"bres_calc",bedges,
          op_arg_dat(p_x,     0,pbedge, 2,"float",OP_READ),
          op_arg_dat(p_x,     1,pbedge, 2,"float",OP_READ),
          op_arg_dat(p_q,     0,pbecell,4,"float",OP_READ),
          op_arg_dat(p_adt,   0,pbecell,1,"float",OP_READ),
          op_arg_dat(p_res,   0,pbecell,4,"float",OP_INC ),
          op_arg_dat(p_bound,-1,OP_ID  ,1,"int",  OP_READ));

      //    update flow field

      rms = 0.0;

      op_par_loop(update,"update",cells,
          op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_READ ),
          op_arg_dat(p_q,   -1,OP_ID, 4,"float",OP_WRITE),
          op_arg_dat(p_res, -1,OP_ID, 4,"float",OP_RW   ),
          op_arg_dat(p_adt, -1,OP_ID, 1,"float",OP_READ ),
          op_arg_gbl(&rms,1,"float",OP_INC));
    }

    //print iteration history
    rms = sqrt(rms/(float) g_ncell);
    if (iter%100 == 0)
      op_printf("%d  %10.5e \n",iter,rms);
  }

  op_timers(&cpu_t2, &wall_t2);

  //get results data array - perhaps can be later handled by a remporary dat
  //op_dat temp = op_mpi_get_data(p_q);

  //output the result dat array to files
  //print_dat_tofile(temp, "out_grid.dat"); //ASCI
  //print_dat_tobinfile(temp, "out_grid.bin"); //Binary

  op_timing_output();

  //print total time for niter interations
  op_printf("Max total runtime = %f\n",wall_t2-wall_t1);
  op_exit();

  free(cell);
  free(edge);
  free(ecell);
  free(bedge);
  free(becell);
  free(bound);
  free(x);
  free(q);
  free(qold);
  free(res);
  free(adt);
}
Exemplo n.º 2
0
int main(int argc, char **argv)
{
  // OP initialisation
  op_init(argc,argv,2);

  //MPI for user I/O
  int my_rank;
  int comm_size;
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);

  int    *becell, *ecell,  *bound, *bedge, *edge, *cell;
  float  *x, *q, *qold, *adt, *res;

  int    nnode,ncell,nedge,nbedge;

  /**------------------------BEGIN  I/O -------------------**/

  char file[] = "new_grid.dat";
  char file_out[] = "new_grid_out.h5";

  /* read in grid from disk on root processor */
  FILE *fp;

  if ( (fp = fopen(file,"r")) == NULL) {
    op_printf("can't open file %s\n",file); exit(-1);
  }

  int   g_nnode,g_ncell,g_nedge,g_nbedge;

  check_scan(fscanf(fp,"%d %d %d %d \n",&g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4);

  int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0;
  float *g_x = 0,*g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0;

  // set constants

  op_printf("initialising flow field\n");
  gam = 1.4f;
  gm1 = gam - 1.0f;
  cfl = 0.9f;
  eps = 0.05f;

  float mach  = 0.4f;
  float alpha = 3.0f*atan(1.0f)/45.0f;
  float p     = 1.0f;
  float r     = 1.0f;
  float u     = sqrt(gam*p/r)*mach;
  float e     = p/(r*gm1) + 0.5f*u*u;

  qinf[0] = r;
  qinf[1] = r*u;
  qinf[2] = 0.0f;
  qinf[3] = r*e;

  op_printf("reading in grid \n");
  op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n"
      ,g_nnode,g_ncell,g_nedge,g_nbedge);

  if(my_rank == MPI_ROOT) {
    g_cell   = (int *) malloc(4*g_ncell*sizeof(int));
    g_edge   = (int *) malloc(2*g_nedge*sizeof(int));
    g_ecell  = (int *) malloc(2*g_nedge*sizeof(int));
    g_bedge  = (int *) malloc(2*g_nbedge*sizeof(int));
    g_becell = (int *) malloc(  g_nbedge*sizeof(int));
    g_bound  = (int *) malloc(  g_nbedge*sizeof(int));

    g_x      = (float *) malloc(2*g_nnode*sizeof(float));
    g_q      = (float *) malloc(4*g_ncell*sizeof(float));
    g_qold   = (float *) malloc(4*g_ncell*sizeof(float));
    g_res    = (float *) malloc(4*g_ncell*sizeof(float));
    g_adt    = (float *) malloc(  g_ncell*sizeof(float));

    for (int n=0; n<g_nnode; n++){
      check_scan(fscanf(fp,"%f %f \n",&g_x[2*n], &g_x[2*n+1]), 2);
    }

    for (int n=0; n<g_ncell; n++) {
      check_scan(fscanf(fp,"%d %d %d %d \n",&g_cell[4*n  ], &g_cell[4*n+1],
            &g_cell[4*n+2], &g_cell[4*n+3]), 4);
    }

    for (int n=0; n<g_nedge; n++) {
      check_scan(fscanf(fp,"%d %d %d %d \n",&g_edge[2*n],&g_edge[2*n+1],
            &g_ecell[2*n],&g_ecell[2*n+1]), 4);
    }

    for (int n=0; n<g_nbedge; n++) {
      check_scan(fscanf(fp,"%d %d %d %d \n",&g_bedge[2*n],&g_bedge[2*n+1],
            &g_becell[n],&g_bound[n]), 4);
    }

    //initialise flow field and residual

    for (int n=0; n<g_ncell; n++) {
      for (int m=0; m<4; m++) {
        g_q[4*n+m] = qinf[m];
        g_res[4*n+m] = 0.0f;
      }
    }
  }

  fclose(fp);

  nnode = compute_local_size (g_nnode, comm_size, my_rank);
  ncell = compute_local_size (g_ncell, comm_size, my_rank);
  nedge = compute_local_size (g_nedge, comm_size, my_rank);
  nbedge = compute_local_size (g_nbedge, comm_size, my_rank);

  op_printf("Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n"
      ,my_rank,nnode,ncell,nedge,nbedge);

  /*Allocate memory to hold local sets, mapping tables and data*/
  cell   = (int *) malloc(4*ncell*sizeof(int));
  edge   = (int *) malloc(2*nedge*sizeof(int));
  ecell  = (int *) malloc(2*nedge*sizeof(int));
  bedge  = (int *) malloc(2*nbedge*sizeof(int));
  becell = (int *) malloc(  nbedge*sizeof(int));
  bound  = (int *) malloc(  nbedge*sizeof(int));

  x      = (float *) malloc(2*nnode*sizeof(float));
  q      = (float *) malloc(4*ncell*sizeof(float));
  qold   = (float *) malloc(4*ncell*sizeof(float));
  res    = (float *) malloc(4*ncell*sizeof(float));
  adt    = (float *) malloc(  ncell*sizeof(float));

  /* scatter sets, mappings and data on sets*/
  scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4);
  scatter_int_array(g_edge, edge, comm_size, g_nedge,nedge, 2);
  scatter_int_array(g_ecell, ecell, comm_size, g_nedge,nedge, 2);
  scatter_int_array(g_bedge, bedge, comm_size, g_nbedge,nbedge, 2);
  scatter_int_array(g_becell, becell, comm_size, g_nbedge,nbedge, 1);
  scatter_int_array(g_bound, bound, comm_size, g_nbedge,nbedge, 1);

  scatter_float_array(g_x, x, comm_size, g_nnode,nnode, 2);
  scatter_float_array(g_q, q, comm_size, g_ncell,ncell, 4);
  scatter_float_array(g_qold, qold, comm_size, g_ncell,ncell, 4);
  scatter_float_array(g_res, res, comm_size, g_ncell,ncell, 4);
  scatter_float_array(g_adt, adt, comm_size, g_ncell,ncell, 1);

  /*Freeing memory allocated to gloabal arrays on rank 0
    after scattering to all processes*/
  if(my_rank == MPI_ROOT) {
    free(g_cell);
    free(g_edge);
    free(g_ecell);
    free(g_bedge);
    free(g_becell);
    free(g_bound);
    free(g_x );
    free(g_q);
    free(g_qold);
    free(g_adt);
    free(g_res);
  }

  /**------------------------END I/O  -----------------------**/

  /* FIXME: It's not clear to the compiler that sth. is going on behind the
     scenes here. Hence theses variables are reported as unused */

  op_set nodes  = op_decl_set(nnode,  "nodes");
  op_set edges  = op_decl_set(nedge,  "edges");
  op_set bedges = op_decl_set(nbedge, "bedges");
  op_set cells  = op_decl_set(ncell,  "cells");

  op_map pedge   = op_decl_map(edges, nodes,2,edge,  "pedge");
  op_map pecell  = op_decl_map(edges, cells,2,ecell, "pecell");
  op_map pbedge  = op_decl_map(bedges,nodes,2,bedge, "pbedge");
  op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell");
  op_map pcell   = op_decl_map(cells, nodes,4,cell,  "pcell");

  op_dat p_bound = op_decl_dat(bedges,1,"int"  ,bound,"p_bound");
  op_dat p_x     = op_decl_dat(nodes ,2,"float",x    ,"p_x");
  op_dat p_q     = op_decl_dat(cells ,4,"float",q    ,"p_q");
  op_dat p_qold  = op_decl_dat(cells ,4,"float",qold ,"p_qold");
  op_dat p_adt   = op_decl_dat(cells ,1,"float",adt  ,"p_adt");
  op_dat p_res   = op_decl_dat(cells ,4,"float",res  ,"p_res");

  op_decl_const(1,"float",&gam  );
  op_decl_const(1,"float",&gm1  );
  op_decl_const(1,"float",&cfl  );
  op_decl_const(1,"float",&eps  );
  op_decl_const(1,"float",&mach );
  op_decl_const(1,"float",&alpha);
  op_decl_const(4,"float",qinf  );

  op_dump_to_hdf5(file_out);
  op_write_const_hdf5("gam",  1,"float",(char *)&gam,  "new_grid_out.h5");
  op_write_const_hdf5("gm1",  1,"float",(char *)&gm1,  "new_grid_out.h5");
  op_write_const_hdf5("cfl",  1,"float",(char *)&cfl,  "new_grid_out.h5");
  op_write_const_hdf5("eps",  1,"float",(char *)&eps,  "new_grid_out.h5");
  op_write_const_hdf5("mach", 1,"float",(char *)&mach, "new_grid_out.h5");
  op_write_const_hdf5("alpha",1,"float",(char *)&alpha,"new_grid_out.h5");
  op_write_const_hdf5("qinf", 4,"float",(char *)qinf,  "new_grid_out.h5");

  //create halos - for sanity check
  op_halo_create();

  op_exit();
}
Exemplo n.º 3
0
int main(int argc, char **argv) {
  // OP initialisation
  op_init(argc, argv, 2);

  // MPI for user I/O
  int my_rank;
  int comm_size;
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);

  // timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  int *pp;
  float *A, *r, *u, *du;

  int nnode, nedge;

  /**------------------------BEGIN I/O and PARTITIONING ---------------------**/

  int g_nnode, g_nedge, g_n, g_e;

  g_nnode = (NN - 1) * (NN - 1);
  g_nedge = (NN - 1) * (NN - 1) + 4 * (NN - 1) * (NN - 2);

  int *g_pp = 0;
  float *g_A = 0, *g_r = 0, *g_u = 0, *g_du = 0;

  op_printf("Global number of nodes, edges = %d, %d\n", g_nnode, g_nedge);

  if (my_rank == MPI_ROOT) {
    g_pp = (int *)malloc(sizeof(int) * 2 * g_nedge);

    g_A = (float *)malloc(sizeof(float) * g_nedge);
    g_r = (float *)malloc(sizeof(float) * g_nnode);
    g_u = (float *)malloc(sizeof(float) * g_nnode);
    g_du = (float *)malloc(sizeof(float) * g_nnode);

    // create matrix and r.h.s., and set coordinates needed for renumbering /
    // partitioning

    g_e = 0;

    for (int i = 1; i < NN; i++) {
      for (int j = 1; j < NN; j++) {
        g_n = i - 1 + (j - 1) * (NN - 1);
        g_r[g_n] = 0.0f;
        g_u[g_n] = 0.0f;
        g_du[g_n] = 0.0f;

        g_pp[2 * g_e] = g_n;
        g_pp[2 * g_e + 1] = g_n;
        g_A[g_e] = -1.0f;
        g_e++;

        for (int pass = 0; pass < 4; pass++) {
          int i2 = i;
          int j2 = j;
          if (pass == 0)
            i2 += -1;
          if (pass == 1)
            i2 += 1;
          if (pass == 2)
            j2 += -1;
          if (pass == 3)
            j2 += 1;

          if ((i2 == 0) || (i2 == NN) || (j2 == 0) || (j2 == NN)) {
            g_r[g_n] += 0.25f;
          } else {
            g_pp[2 * g_e] = g_n;
            g_pp[2 * g_e + 1] = i2 - 1 + (j2 - 1) * (NN - 1);
            g_A[g_e] = 0.25f;
            g_e++;
          }
        }
      }
    }
  }

  /* Compute local sizes */
  nnode = compute_local_size(g_nnode, comm_size, my_rank);
  nedge = compute_local_size(g_nedge, comm_size, my_rank);
  op_printf("Number of nodes, edges on process %d = %d, %d\n", my_rank, nnode,
            nedge);

  /*Allocate memory to hold local sets, mapping tables and data*/
  pp = (int *)malloc(2 * sizeof(int) * nedge);

  A = (float *)malloc(nedge * sizeof(float));
  r = (float *)malloc(nnode * sizeof(float));
  u = (float *)malloc(nnode * sizeof(float));
  du = (float *)malloc(nnode * sizeof(float));

  /* scatter sets, mappings and data on sets*/
  scatter_int_array(g_pp, pp, comm_size, g_nedge, nedge, 2);
  scatter_float_array(g_A, A, comm_size, g_nedge, nedge, 1);
  scatter_float_array(g_r, r, comm_size, g_nnode, nnode, 1);
  scatter_float_array(g_u, u, comm_size, g_nnode, nnode, 1);
  scatter_float_array(g_du, du, comm_size, g_nnode, nnode, 1);

  /*Freeing memory allocated to gloabal arrays on rank 0
    after scattering to all processes*/
  if (my_rank == MPI_ROOT) {
    free(g_pp);
    free(g_A);
    free(g_r);
    free(g_u);
    free(g_du);
  }

  /**------------------------END I/O and PARTITIONING ---------------------**/

  // declare sets, pointers, and datasets

  op_set nodes = op_decl_set(nnode, "nodes");
  op_set edges = op_decl_set(nedge, "edges");

  op_map ppedge = op_decl_map(edges, nodes, 2, pp, "ppedge");

  op_dat p_A = op_decl_dat(edges, 1, "float", A, "p_A");
  op_dat p_r = op_decl_dat(nodes, 1, "float", r, "p_r");
  op_dat p_u = op_decl_dat(nodes, 1, "float", u, "p_u");
  op_dat p_du = op_decl_dat(nodes, 1, "float", du, "p_du");

  alpha = 1.0f;
  op_decl_const(1, "float", &alpha);

  op_diagnostic_output();

  // trigger partitioning and halo creation routines
  op_partition("PTSCOTCH", "KWAY", NULL, NULL, NULL);

  // initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);

  // main iteration loop

  float u_sum, u_max, beta = 1.0f;

  for (int iter = 0; iter < NITER; iter++) {
    op_par_loop(res, "res", edges,
                op_arg_dat(p_A, -1, OP_ID, 1, "float", OP_READ),
                op_arg_dat(p_u, 1, ppedge, 1, "float", OP_READ),
                op_arg_dat(p_du, 0, ppedge, 1, "float", OP_INC),
                op_arg_gbl(&beta, 1, "float", OP_READ));

    u_sum = 0.0f;
    u_max = 0.0f;
    op_par_loop(update, "update", nodes,
                op_arg_dat(p_r, -1, OP_ID, 1, "float", OP_READ),
                op_arg_dat(p_du, -1, OP_ID, 1, "float", OP_RW),
                op_arg_dat(p_u, -1, OP_ID, 1, "float", OP_INC),
                op_arg_gbl(&u_sum, 1, "float", OP_INC),
                op_arg_gbl(&u_max, 1, "float", OP_MAX));

    op_printf("\n u max/rms = %f %f \n\n", u_max, sqrt(u_sum / g_nnode));
  }

  op_timers(&cpu_t2, &wall_t2);

  // get results data array
  op_fetch_data(p_u, u);

  // output the result dat array to files
  op_print_dat_to_txtfile(p_u, "out_grid_mpi.dat"); // ASCI
  op_print_dat_to_binfile(p_u, "out_grid_mpi.bin"); // Binary

  printf("solution on rank %d\n", my_rank);
  for (int i = 0; i < nnode; i++) {
    printf(" %7.4f", u[i]);
    fflush(stdout);
  }
  printf("\n");

  // print each mpi process's timing info for each kernel
  op_timing_output();

  // print total time for niter interations
  op_printf("Max total runtime = %f\n", wall_t2 - wall_t1);

  // gather results from all ranks and check
  float *ug = (float *)malloc(sizeof(float) * op_get_size(nodes));
  op_fetch_data_idx(p_u, ug, 0, op_get_size(nodes) - 1);
  int result = check_result<float>(ug, NN, TOLERANCE);
  free(ug);

  op_exit();

  free(u);
  free(pp);
  free(A);
  free(r);
  free(du);

  return result;
}
Exemplo n.º 4
0
int main(int argc, char **argv){

  int my_rank;
  int comm_size;
  
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
  
  //timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;                                        
  double time;
  double max_time;
  
  int *pp;
  float *A, *r, *u, *du;
  
  int   nnode, nedge, n, e;
  float dx;

  /**------------------------BEGIN I/O and PARTITIONING ---------------------**/
  
  int g_nnode, g_nedge, g_dx, g_n, g_e;
  
  g_nnode = (NN-1)*(NN-1);
  g_nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2);
  g_dx    = 1.0f / ((float) NN);

  int *g_pp;
  float *g_A, *g_r, *g_u, *g_du;
  
  if(my_rank == MPI_ROOT)
  {
      printf("Global number of nodes, edges = %d, %d\n",g_nnode,g_nedge);
      
      g_pp = (int *)malloc(sizeof(int)*2*g_nedge);
    
      g_A  = (float *)malloc(sizeof(float)*g_nedge);
      g_r  = (float *)malloc(sizeof(float)*g_nnode);
      g_u  = (float *)malloc(sizeof(float)*g_nnode);
      g_du = (float *)malloc(sizeof(float)*g_nnode);
      
      // create matrix and r.h.s., and set coordinates needed for renumbering / partitioning
    
      g_e = 0;
    
      for (int i=1; i<NN; i++) {
	for (int j=1; j<NN; j++) {
	  g_n         = i-1 + (j-1)*(NN-1);
	  g_r[g_n]      = 0.0f;
	  g_u[g_n]      = 0.0f;
	  g_du[g_n]     = 0.0f;
    
	  g_pp[2*g_e]   = g_n;
	  g_pp[2*g_e+1] = g_n;
	  g_A[g_e]      = -1.0f;
	  g_e++;
    
	  for (int pass=0; pass<4; pass++) {
	    int i2 = i;
	    int j2 = j;
	    if (pass==0) i2 += -1;
	    if (pass==1) i2 +=  1;
	    if (pass==2) j2 += -1;
	    if (pass==3) j2 +=  1;
    
	    if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) {
	      g_r[g_n] += 0.25f;
	    }
	    else {
	      g_pp[2*g_e]   = g_n;
	      g_pp[2*g_e+1] = i2-1 + (j2-1)*(NN-1);
	      g_A[g_e]      = 0.25f;
	      g_e++;
	    }
	  }
	}
      }
  }
  
  /* Compute local sizes */ 
  nnode = compute_local_size (g_nnode, comm_size, my_rank);
  nedge = compute_local_size (g_nedge, comm_size, my_rank);
  printf("Number of nodes, edges on process %d = %d, %d\n"
  	  ,my_rank,nnode,nedge);
  
  /*Allocate memory to hold local sets, mapping tables and data*/
  pp = (int *)malloc(2*sizeof(int)*nedge);
  
  A      = (float *) malloc(nedge*sizeof(float));
  r      = (float *) malloc(nnode*sizeof(float));
  u      = (float *) malloc(nnode*sizeof(float));
  du      = (float *) malloc(nnode*sizeof(float));
  
  /* scatter sets, mappings and data on sets*/
  scatter_int_array(g_pp, pp, comm_size, g_nedge,nedge, 2);
  scatter_float_array(g_A, A, comm_size, g_nedge,nedge, 1);
  scatter_float_array(g_r, r, comm_size, g_nnode,nnode, 1);
  scatter_float_array(g_u, u, comm_size, g_nnode,nnode, 1);
  scatter_float_array(g_du, du, comm_size, g_nnode,nnode, 1);
      
  if(my_rank == MPI_ROOT)
  { 	  /*Freeing memory allocated to gloabal arrays on rank 0 
  	  after scattering to all processes*/
	  free(g_pp);
	  free(g_A);
	  free(g_r);
	  free(g_u);
	  free(g_du);
  }
  
  /**------------------------END I/O and PARTITIONING ---------------------**/
  
  // OP initialisation

  op_init(argc,argv,2);

  // declare sets, pointers, and datasets

  op_set nodes = op_decl_set(nnode,"nodes");
  op_set edges = op_decl_set(nedge,"edges");

  op_map ppedge = op_decl_map(edges,nodes,2,pp, "ppedge");

  op_dat p_A = op_decl_dat(edges,1,"float", A,  "p_A" );
  op_dat p_r = op_decl_dat(nodes,1,"float", r,  "p_r" );
  op_dat p_u = op_decl_dat(nodes,1,"float", u,  "p_u" );
  op_dat p_du = op_decl_dat(nodes,1,"float", du,"p_du");

  alpha = 1.0f;
  op_decl_const(1,"float",&alpha);

  op_diagnostic_output();
  
  //random partitioning for diagnostics pourposes
  //op_partition_random(nodes);
  
  //create halos
  op_halo_create();
  
  //initialise timers for total execution wall time                                                         
  op_timers(&cpu_t1, &wall_t1); 
  

  // main iteration loop

  float u_sum, u_max, beta = 1.0f;

  for (int iter=0; iter<NITER; iter++) {
    op_par_loop(res,"res", edges,
    	op_arg_dat(p_A,  -1,OP_ID,  1,"float", OP_READ),
    	op_arg_dat(p_u,   1,ppedge, 1,"float", OP_READ),
    	op_arg_dat(p_du,  0,ppedge, 1,"float", OP_INC),
        op_arg_gbl(&beta, 1,"float", OP_READ));
    
    u_sum = 0.0f;
    u_max = 0.0f;
    op_par_loop(update,"update", nodes,
    	op_arg_dat(p_r,   -1,OP_ID, 1,"float",OP_READ),
    	op_arg_dat(p_du,  -1,OP_ID, 1,"float",OP_RW),
    	op_arg_dat(p_u,   -1,OP_ID, 1,"float",OP_INC),
    	op_arg_gbl(&u_sum,1,"float",OP_INC),
    	op_arg_gbl(&u_max,1,"float",OP_MAX));
    
    if(my_rank == MPI_ROOT)
    	printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/g_nnode));
  }
  
  op_timers(&cpu_t2, &wall_t2);
  
  //get results data array
  op_dat temp = op_mpi_get_data(p_u);
  
  //output the result dat array to files
  print_dat_tofile(temp, "out_grid.dat"); //ASCI
  print_dat_tobinfile(temp, "out_grid.bin"); //Binary  
  
  //free memory allocated to halos
  op_halo_destroy();
  //return all op_dats, op_maps back to original element order
  op_partition_reverse(); 
    
  //print each mpi process's timing info for each kernel
  op_mpi_timing_output();
  //print total time for niter interations
  time = wall_t2-wall_t1;
  MPI_Reduce(&time,&max_time,1,MPI_DOUBLE, MPI_MAX,MPI_ROOT, MPI_COMM_WORLD);
  if(my_rank==MPI_ROOT)printf("Max total runtime = %f\n",max_time);  
  
  MPI_Finalize();   //user mpi finalize
}