예제 #1
0
int main(int argc, char **argv)
{
  // OP initialisation

  op_init(argc,argv,2);

  int    *bnode, *cell;
  double  *xm;//, *q;

  int    nnode,ncell,nbnodes,niter;
  double  rms = 1;

  // set constants and initialise flow field and residual

  op_printf("initialising flow field \n");

  double gam = 1.4;
  gm1 = gam - 1.0;
  gm1i = 1.0/gm1;

  wtg1[0] = 0.5;
  wtg1[1] = 0.5;
  xi1[0] = 0.211324865405187;
  xi1[1] = 0.788675134594813;
  Ng1[0] = 0.788675134594813;
  Ng1[1] = 0.211324865405187;
  Ng1[2] = 0.211324865405187;
  Ng1[3] = 0.788675134594813;
  Ng1_xi[0] = -1;
  Ng1_xi[1] = -1;
  Ng1_xi[2] = 1;
  Ng1_xi[3] = 1;
  wtg2[0] = 0.25;
  wtg2[1] = 0.25;
  wtg2[2] = 0.25;
  wtg2[3] = 0.25;
  Ng2[0] = 0.622008467928146; Ng2[1] = 0.166666666666667; Ng2[2] = 0.166666666666667; Ng2[3] = 0.044658198738520;
  Ng2[4] = 0.166666666666667; Ng2[5] = 0.622008467928146; Ng2[6] = 0.044658198738520; Ng2[7] = 0.166666666666667;
  Ng2[8] = 0.166666666666667; Ng2[9] = 0.044658198738520; Ng2[10] = 0.622008467928146; Ng2[11] = 0.166666666666667;
  Ng2[12] = 0.044658198738520; Ng2[13] = 0.166666666666667; Ng2[14] = 0.166666666666667; Ng2[15] = 0.622008467928146;
  Ng2_xi[0] = -0.788675134594813;  Ng2_xi[1] = 0.788675134594813;  Ng2_xi[2] = -0.211324865405187;Ng2_xi[3] = 0.211324865405187;
  Ng2_xi[4] = -0.788675134594813;  Ng2_xi[5] = 0.788675134594813;  Ng2_xi[6] = -0.211324865405187; Ng2_xi[7] = 0.211324865405187;
  Ng2_xi[8] = -0.211324865405187;  Ng2_xi[9] = 0.211324865405187;  Ng2_xi[10] = -0.788675134594813; Ng2_xi[11] = 0.788675134594813;
  Ng2_xi[12] = -0.211324865405187;  Ng2_xi[13] = 0.211324865405187;  Ng2_xi[14] = -0.788675134594813; Ng2_xi[15] = 0.788675134594813;
  Ng2_xi[16] = -0.788675134594813;  Ng2_xi[17] = -0.211324865405187;  Ng2_xi[18] = 0.788675134594813; Ng2_xi[19] = 0.211324865405187;
  Ng2_xi[20] = -0.211324865405187;  Ng2_xi[21] = -0.788675134594813;  Ng2_xi[22] = 0.211324865405187; Ng2_xi[23] = 0.788675134594813;
  Ng2_xi[24] = -0.788675134594813;  Ng2_xi[25] = -0.211324865405187;  Ng2_xi[26] = 0.788675134594813; Ng2_xi[27] = 0.211324865405187;
  Ng2_xi[28] = -0.211324865405187;  Ng2_xi[29] = -0.788675134594813;  Ng2_xi[30] = 0.211324865405187; Ng2_xi[31] = 0.788675134594813;

  minf = 0.1;
  m2 = minf*minf;
  freq = 1;
  kappa = 1;
  nmode = 0;

  mfan = 1.0;

  char file[] = "FE_grid.h5";


  // declare sets, pointers, datasets and global constants

  op_set nodes  = op_decl_set_hdf5(file,  "nodes");
  op_set bnodes = op_decl_set_hdf5(file, "bedges");
  op_set cells  = op_decl_set_hdf5(file,  "cells");

  op_map pbnodes = op_decl_map_hdf5(bnodes,nodes,1,file, "pbedge");
  op_map pcell   = op_decl_map_hdf5(cells, nodes,4,file,  "pcell");

  op_dat p_xm    = op_decl_dat_hdf5(nodes ,2,"double",  file, "p_x");
  op_dat p_phim  = op_decl_dat_hdf5(nodes, 1, "double", file, "p_phim");
  op_dat p_resm  = op_decl_dat_hdf5(nodes, 1, "double", file, "p_resm");
  op_dat p_K     = op_decl_dat_hdf5(cells, 16, "double:soa",file, "p_K");
  op_dat p_V     = op_decl_dat_hdf5(nodes, 1, "double", file, "p_V");
  op_dat p_P     = op_decl_dat_hdf5(nodes, 1, "double", file, "p_P");
  op_dat p_U     = op_decl_dat_hdf5(nodes, 1, "double", file, "p_U");

  op_decl_const2("gam",1,"double",&gam  );
  op_decl_const2("gm1",1,"double",&gm1  );
  op_decl_const2("gm1i",1,"double",&gm1i  );
  op_decl_const2("m2",1,"double",&m2  );
  op_decl_const2("wtg1",2,"double",wtg1  );
  op_decl_const2("xi1",2,"double",xi1  );
  op_decl_const2("Ng1",4,"double",Ng1  );
  op_decl_const2("Ng1_xi",4,"double",Ng1_xi  );
  op_decl_const2("wtg2",4,"double",wtg2  );
  op_decl_const2("Ng2",16,"double",Ng2  );
  op_decl_const2("Ng2_xi",32,"double",Ng2_xi  );
  op_decl_const2("minf",1,"double",&minf  );
  op_decl_const2("freq",1,"double",&freq  );
  op_decl_const2("kappa",1,"double",&kappa  );
  op_decl_const2("nmode",1,"double",&nmode  );
  op_decl_const2("mfan",1,"double",&mfan  );

  op_diagnostic_output();

  op_partition("PTSCOTCH", "KWAY", cells, pcell, p_xm);

  op_printf("nodes: %d cells: %d bnodes: %d\n", nodes->size, cells->size, bnodes->size);
  nnode = op_get_size(nodes);
  ncell = op_get_size(cells);
  nbnodes = op_get_size(bnodes);

  double cpu_t1, cpu_t2, wall_t1, wall_t2;
  op_timers(&cpu_t1, &wall_t1);

  // main time-marching loop

  niter = 20;

  for(int iter=1; iter<=niter; iter++) {

    op_par_loop_res_calc("res_calc",cells,
               op_arg_dat(p_xm,-4,pcell,2,"double",OP_READ),
               op_arg_dat(p_phim,-4,pcell,1,"double",OP_READ),
               op_arg_dat(p_K,-1,OP_ID,16,"double:soa",OP_WRITE),
               op_arg_dat(p_resm,-4,pcell,1,"double",OP_INC));

    op_par_loop_dirichlet("dirichlet",bnodes,
               op_arg_dat(p_resm,0,pbnodes,1,"double",OP_WRITE));

    double c1 = 0;
    double c2 = 0;
    double c3 = 0;
    double alpha = 0;
    double beta = 0;

    //c1 = R'*R;
    op_par_loop_init_cg("init_cg",nodes,
               op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_READ),
               op_arg_gbl(&c1,1,"double",OP_INC),
               op_arg_dat(p_U,-1,OP_ID,1,"double",OP_WRITE),
               op_arg_dat(p_V,-1,OP_ID,1,"double",OP_WRITE),
               op_arg_dat(p_P,-1,OP_ID,1,"double",OP_WRITE));

    //set up stopping conditions
    double res0 = sqrt(c1);
    double res = res0;
    int iter = 0;
    int maxiter = 200;
    while (res > 0.1*res0 && iter < maxiter) {
      //V = Stiffness*P
      op_par_loop_spMV("spMV",cells,
                 op_arg_dat(p_V,-4,pcell,1,"double",OP_INC),
                 op_arg_dat(p_K,-1,OP_ID,16,"double:soa",OP_READ),
                 op_arg_dat(p_P,-4,pcell,1,"double",OP_READ));

      op_par_loop_dirichlet("dirichlet",bnodes,
                 op_arg_dat(p_V,0,pbnodes,1,"double",OP_WRITE));

      c2 = 0;

      //c2 = P'*V;
      op_par_loop_dotPV("dotPV",nodes,
                 op_arg_dat(p_P,-1,OP_ID,1,"double",OP_READ),
                 op_arg_dat(p_V,-1,OP_ID,1,"double",OP_READ),
                 op_arg_gbl(&c2,1,"double",OP_INC));

      alpha = c1/c2;

      //U = U + alpha*P;
      //resm = resm-alpha*V;
      op_par_loop_updateUR("updateUR",nodes,
                 op_arg_dat(p_U,-1,OP_ID,1,"double",OP_INC),
                 op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_INC),
                 op_arg_dat(p_P,-1,OP_ID,1,"double",OP_READ),
                 op_arg_dat(p_V,-1,OP_ID,1,"double",OP_RW),
                 op_arg_gbl(&alpha,1,"double",OP_READ));

      c3 = 0;

      //c3 = resm'*resm;
      op_par_loop_dotR("dotR",nodes,
                 op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_READ),
                 op_arg_gbl(&c3,1,"double",OP_INC));
      beta = c3/c1;
      //P = beta*P+resm;
      op_par_loop_updateP("updateP",nodes,
                 op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_READ),
                 op_arg_dat(p_P,-1,OP_ID,1,"double",OP_RW),
                 op_arg_gbl(&beta,1,"double",OP_READ));
      c1 = c3;
      res = sqrt(c1);
      iter++;
    }
    rms = 0;
    //phim = phim - Stiffness\Load;
    op_par_loop_update("update",nodes,
               op_arg_dat(p_phim,-1,OP_ID,1,"double",OP_RW),
               op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_WRITE),
               op_arg_dat(p_U,-1,OP_ID,1,"double",OP_READ),
               op_arg_gbl(&rms,1,"double",OP_INC));
    op_printf("rms = %10.5e iter: %d\n", sqrt(rms)/sqrt(nnode), iter);
  }

  op_timing_output();
  op_timers(&cpu_t2, &wall_t2);
  op_printf("Max total runtime = %f\n",wall_t2-wall_t1);
  op_exit();
}
예제 #2
0
int main(int argc, char **argv) {
  // OP initialisation

  op_init(argc, argv, 2);

  // MPI for user I/O
  int my_rank;
  int comm_size;
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);

  // timer
  double cpu_t1, cpu_t2, wall_t1, wall_t2;

  int *bnode, *cell, *g_bnode, *g_cell;
  double *xm, *g_xm;
  ;

  int nnode, ncell, nbnodes, niter, g_nnode, g_ncell, g_nbnodes;
  double rms = 1;

  // read in grid

  op_printf("reading in grid \n");

  FILE *fp;
  if ((fp = fopen("FE_grid.dat", "r")) == NULL) {
    op_printf("can't open file FE_grid.dat\n");
    exit(-1);
  }

  if (fscanf(fp, "%d %d %d \n", &g_nnode, &g_ncell, &g_nbnodes) != 3) {
    op_printf("error reading from new_grid.dat\n");
    exit(-1);
  }

  if (my_rank == MPI_ROOT) {
    g_cell = (int *)malloc(4 * g_ncell * sizeof(int));
    g_bnode = (int *)malloc(g_nbnodes * sizeof(int));
    g_xm = (double *)malloc(2 * g_nnode * sizeof(double));

    for (int n = 0; n < g_nnode; n++) {
      if (fscanf(fp, "%lf %lf \n", &g_xm[2 * n], &g_xm[2 * n + 1]) != 2) {
        op_printf("error reading from new_grid.dat\n");
        exit(-1);
      }
    }

    for (int n = 0; n < g_ncell; n++) {
      if (fscanf(fp, "%d %d %d %d \n", &g_cell[4 * n], &g_cell[4 * n + 1],
                 &g_cell[4 * n + 2], &g_cell[4 * n + 3]) != 4) {
        op_printf("error reading from new_grid.dat\n");
        exit(-1);
      }
    }

    for (int n = 0; n < g_nbnodes; n++) {
      if (fscanf(fp, "%d \n", &g_bnode[n]) != 1) {
        op_printf("error reading from new_grid.dat\n");
        exit(-1);
      }
    }
  }
  fclose(fp);

  nnode = compute_local_size(g_nnode, comm_size, my_rank);
  ncell = compute_local_size(g_ncell, comm_size, my_rank);
  nbnodes = compute_local_size(g_nbnodes, comm_size, my_rank);

  cell = (int *)malloc(4 * ncell * sizeof(int));
  bnode = (int *)malloc(nbnodes * sizeof(int));
  xm = (double *)malloc(2 * nnode * sizeof(double));

  scatter_int_array(g_cell, cell, comm_size, g_ncell, ncell, 4);
  scatter_int_array(g_bnode, bnode, comm_size, g_nbnodes, nbnodes, 1);
  scatter_double_array(g_xm, xm, comm_size, g_nnode, nnode, 2);

  if (my_rank == MPI_ROOT) {
    free(g_cell);
    free(g_xm);
    free(g_bnode);
  }

  // set constants and initialise flow field and residual

  op_printf("initialising flow field \n");

  double gam = 1.4;
  gm1 = gam - 1.0;
  gm1i = 1.0 / gm1;

  wtg1[0] = 0.5;
  wtg1[1] = 0.5;
  xi1[0] = 0.211324865405187;
  xi1[1] = 0.788675134594813;
  Ng1[0] = 0.788675134594813;
  Ng1[1] = 0.211324865405187;
  Ng1[2] = 0.211324865405187;
  Ng1[3] = 0.788675134594813;
  Ng1_xi[0] = -1;
  Ng1_xi[1] = -1;
  Ng1_xi[2] = 1;
  Ng1_xi[3] = 1;
  wtg2[0] = 0.25;
  wtg2[1] = 0.25;
  wtg2[2] = 0.25;
  wtg2[3] = 0.25;
  Ng2[0] = 0.622008467928146;
  Ng2[1] = 0.166666666666667;
  Ng2[2] = 0.166666666666667;
  Ng2[3] = 0.044658198738520;
  Ng2[4] = 0.166666666666667;
  Ng2[5] = 0.622008467928146;
  Ng2[6] = 0.044658198738520;
  Ng2[7] = 0.166666666666667;
  Ng2[8] = 0.166666666666667;
  Ng2[9] = 0.044658198738520;
  Ng2[10] = 0.622008467928146;
  Ng2[11] = 0.166666666666667;
  Ng2[12] = 0.044658198738520;
  Ng2[13] = 0.166666666666667;
  Ng2[14] = 0.166666666666667;
  Ng2[15] = 0.622008467928146;
  Ng2_xi[0] = -0.788675134594813;
  Ng2_xi[1] = 0.788675134594813;
  Ng2_xi[2] = -0.211324865405187;
  Ng2_xi[3] = 0.211324865405187;
  Ng2_xi[4] = -0.788675134594813;
  Ng2_xi[5] = 0.788675134594813;
  Ng2_xi[6] = -0.211324865405187;
  Ng2_xi[7] = 0.211324865405187;
  Ng2_xi[8] = -0.211324865405187;
  Ng2_xi[9] = 0.211324865405187;
  Ng2_xi[10] = -0.788675134594813;
  Ng2_xi[11] = 0.788675134594813;
  Ng2_xi[12] = -0.211324865405187;
  Ng2_xi[13] = 0.211324865405187;
  Ng2_xi[14] = -0.788675134594813;
  Ng2_xi[15] = 0.788675134594813;
  Ng2_xi[16] = -0.788675134594813;
  Ng2_xi[17] = -0.211324865405187;
  Ng2_xi[18] = 0.788675134594813;
  Ng2_xi[19] = 0.211324865405187;
  Ng2_xi[20] = -0.211324865405187;
  Ng2_xi[21] = -0.788675134594813;
  Ng2_xi[22] = 0.211324865405187;
  Ng2_xi[23] = 0.788675134594813;
  Ng2_xi[24] = -0.788675134594813;
  Ng2_xi[25] = -0.211324865405187;
  Ng2_xi[26] = 0.788675134594813;
  Ng2_xi[27] = 0.211324865405187;
  Ng2_xi[28] = -0.211324865405187;
  Ng2_xi[29] = -0.788675134594813;
  Ng2_xi[30] = 0.211324865405187;
  Ng2_xi[31] = 0.788675134594813;

  minf = 0.1;
  m2 = minf * minf;
  freq = 1;
  kappa = 1;
  nmode = 0;

  mfan = 1.0;

  double *phim = (double *)malloc(nnode * sizeof(double));
  memset(phim, 0, nnode * sizeof(double));
  for (int i = 0; i < nnode; i++) {
    phim[i] = minf * xm[2 * i];
  }

  double *K = (double *)malloc(4 * 4 * ncell * sizeof(double));
  memset(K, 0, 4 * 4 * ncell * sizeof(double));
  double *resm = (double *)malloc(nnode * sizeof(double));
  memset(resm, 0, nnode * sizeof(double));

  double *V = (double *)malloc(nnode * sizeof(double));
  memset(V, 0, nnode * sizeof(double));
  double *P = (double *)malloc(nnode * sizeof(double));
  memset(P, 0, nnode * sizeof(double));
  double *U = (double *)malloc(nnode * sizeof(double));
  memset(U, 0, nnode * sizeof(double));

  // declare sets, pointers, datasets and global constants

  op_set nodes = op_decl_set(nnode, "nodes");
  op_set bnodes = op_decl_set(nbnodes, "bedges");
  op_set cells = op_decl_set(ncell, "cells");

  op_map pbnodes = op_decl_map(bnodes, nodes, 1, bnode, "pbedge");
  op_map pcell = op_decl_map(cells, nodes, 4, cell, "pcell");

  op_dat p_xm = op_decl_dat(nodes, 2, "double", xm, "p_x");
  op_dat p_phim = op_decl_dat(nodes, 1, "double", phim, "p_phim");
  op_dat p_resm = op_decl_dat(nodes, 1, "double", resm, "p_resm");
  op_dat p_K = op_decl_dat(cells, 16, "double:soa", K, "p_K");

  op_dat p_V = op_decl_dat(nodes, 1, "double", V, "p_V");
  op_dat p_P = op_decl_dat(nodes, 1, "double", P, "p_P");
  op_dat p_U = op_decl_dat(nodes, 1, "double", U, "p_U");

  op_decl_const2("gam", 1, "double", &gam);
  op_decl_const2("gm1", 1, "double", &gm1);
  op_decl_const2("gm1i", 1, "double", &gm1i);
  op_decl_const2("m2", 1, "double", &m2);
  op_decl_const2("wtg1", 2, "double", wtg1);
  op_decl_const2("xi1", 2, "double", xi1);
  op_decl_const2("Ng1", 4, "double", Ng1);
  op_decl_const2("Ng1_xi", 4, "double", Ng1_xi);
  op_decl_const2("wtg2", 4, "double", wtg2);
  op_decl_const2("Ng2", 16, "double", Ng2);
  op_decl_const2("Ng2_xi", 32, "double", Ng2_xi);
  op_decl_const2("minf", 1, "double", &minf);
  op_decl_const2("freq", 1, "double", &freq);
  op_decl_const2("kappa", 1, "double", &kappa);
  op_decl_const2("nmode", 1, "double", &nmode);
  op_decl_const2("mfan", 1, "double", &mfan);

  op_diagnostic_output();

  op_partition("PTSCOTCH", "KWAY", cells, pcell, NULL);

  // main time-marching loop

  niter = 20;
  // initialise timers for total execution wall time
  op_timers(&cpu_t1, &wall_t1);
  for (int iter = 1; iter <= niter; iter++) {

    op_par_loop_res_calc("res_calc", cells,
                         op_arg_dat(p_xm, -4, pcell, 2, "double", OP_READ),
                         op_arg_dat(p_phim, -4, pcell, 1, "double", OP_READ),
                         op_arg_dat(p_K, -1, OP_ID, 16, "double:soa", OP_WRITE),
                         op_arg_dat(p_resm, -4, pcell, 1, "double", OP_INC));

    op_par_loop_dirichlet("dirichlet", bnodes, op_arg_dat(p_resm, 0, pbnodes, 1,
                                                          "double", OP_WRITE));

    double c1 = 0;
    double c2 = 0;
    double c3 = 0;
    double alpha = 0;
    double beta = 0;

    // c1 = R'*R;
    op_par_loop_init_cg("init_cg", nodes,
                        op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ),
                        op_arg_gbl(&c1, 1, "double", OP_INC),
                        op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_WRITE),
                        op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_WRITE),
                        op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_WRITE));

    // set up stopping conditions
    double res0 = sqrt(c1);
    double res = res0;
    int inner_iter = 0;
    int maxiter = 200;
    while (res > 0.1 * res0 && inner_iter < maxiter) {
      // V = Stiffness*P
      op_par_loop_spMV("spMV", cells,
                       op_arg_dat(p_V, -4, pcell, 1, "double", OP_INC),
                       op_arg_dat(p_K, -1, OP_ID, 16, "double:soa", OP_READ),
                       op_arg_dat(p_P, -4, pcell, 1, "double", OP_READ));

      op_par_loop_dirichlet("dirichlet", bnodes,
                            op_arg_dat(p_V, 0, pbnodes, 1, "double", OP_WRITE));

      c2 = 0;

      // c2 = P'*V;
      op_par_loop_dotPV("dotPV", nodes,
                        op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_READ),
                        op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_READ),
                        op_arg_gbl(&c2, 1, "double", OP_INC));

      alpha = c1 / c2;

      // U = U + alpha*P;
      // resm = resm-alpha*V;
      op_par_loop_updateUR("updateUR", nodes,
                           op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_INC),
                           op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_INC),
                           op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_READ),
                           op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_RW),
                           op_arg_gbl(&alpha, 1, "double", OP_READ));

      c3 = 0;

      // c3 = resm'*resm;
      op_par_loop_dotR("dotR", nodes,
                       op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ),
                       op_arg_gbl(&c3, 1, "double", OP_INC));
      beta = c3 / c1;
      // P = beta*P+resm;
      op_par_loop_updateP("updateP", nodes,
                          op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ),
                          op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_RW),
                          op_arg_gbl(&beta, 1, "double", OP_READ));
      c1 = c3;
      res = sqrt(c1);
      inner_iter++;
    }
    rms = 0;
    // phim = phim - Stiffness\Load;
    op_par_loop_update("update", nodes,
                       op_arg_dat(p_phim, -1, OP_ID, 1, "double", OP_RW),
                       op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_WRITE),
                       op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_READ),
                       op_arg_gbl(&rms, 1, "double", OP_INC));
    op_printf("rms = %10.5e iter: %d\n", sqrt(rms) / sqrt(g_nnode), inner_iter);
  }
  op_timers(&cpu_t2, &wall_t2);
  op_timing_output();
  op_printf("Max total runtime = %f\n", wall_t2 - wall_t1);
  op_exit();

  /*free(cell);
  free(bnode);
  free(xm);
  free(phim);
  free(K);
  free(resm);
  free(V);
  free(P);
  free(U);*/
}