示例#1
0
int main( int argc, char *argv[] )
{
    int errs = 0;
    int *ranks;
    int *ranksout;
    MPI_Group gworld, grev, gself;
    MPI_Comm  comm;
    MPI_Comm  commrev;
    int rank, size, i;
    double start, end, time1, time2;

    MTest_Init( &argc, &argv );

    comm = MPI_COMM_WORLD;

    MPI_Comm_size( comm, &size );
    MPI_Comm_rank( comm, &rank );

    ranks    = malloc(size*sizeof(int));
    ranksout = malloc(size*sizeof(int));
    if (!ranks || !ranksout) {
        fprintf(stderr, "out of memory\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /* generate a comm with the rank order reversed */
    MPI_Comm_split(comm, 0, (size-rank-1), &commrev);
    MPI_Comm_group(commrev, &grev);
    MPI_Comm_group(MPI_COMM_SELF, &gself);
    MPI_Comm_group(comm, &gworld);

    /* sanity check correctness first */
    for (i=0; i < size; i++) {
        ranks[i] = i;
        ranksout[i] = -1;
    }
    MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout);
    for (i=0; i < size; i++) {
        if (ranksout[i] != (size-i-1)) {
            if (rank == 0)
                printf("%d: (gworld) expected ranksout[%d]=%d, got %d\n", rank, i, (size-rank-1), ranksout[i]);
            ++errs;
        }
    }
    MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout);
    for (i=0; i < size; i++) {
        int expected = (i == (size-rank-1) ? 0 : MPI_UNDEFINED);
        if (ranksout[i] != expected) {
            if (rank == 0)
                printf("%d: (gself) expected ranksout[%d]=%d, got %d\n", rank, i, expected, ranksout[i]);
            ++errs;
        }
    }

    /* now compare relative performance */

    /* we needs lots of procs to get a group large enough to have meaningful
     * numbers.  On most testing machines this means that we're oversubscribing
     * cores in a big way, which might perturb the timing results.  So we make
     * sure everyone started up and then everyone but rank 0 goes to sleep to
     * let rank 0 do all the timings. */
    MPI_Barrier(comm);

    if (rank != 0) {
        sleep(10);
    }
    else /* rank==0 */ {
        sleep(1); /* try to avoid timing while everyone else is making syscalls */

        MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout); /*throwaway iter*/
        start = MPI_Wtime();
        for (i = 0; i < NUM_LOOPS; ++i) {
            MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout);
        }
        end = MPI_Wtime();
        time1 = end - start;

        MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout); /*throwaway iter*/
        start = MPI_Wtime();
        for (i = 0; i < NUM_LOOPS; ++i) {
            MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout);
        }
        end = MPI_Wtime();
        time2 = end - start;

        /* complain if the "gworld" time exceeds 2x the "gself" time */
        if (fabs(time1 - time2) > (2.00 * time2)) {
            printf("too much difference in MPI_Group_translate_ranks performance:\n");
            printf("time1=%f time2=%f\n", time1, time2);
            printf("(fabs(time1-time2)/time2)=%f\n", (fabs(time1-time2)/time2));
            if (time1 < time2) {
                printf("also, (time1<time2) is surprising...\n");
            }
            ++errs;
        }
    }

    free(ranks);
    free(ranksout);

    MPI_Group_free(&grev);
    MPI_Group_free(&gself);
    MPI_Group_free(&gworld);

    MPI_Comm_free(&commrev);

    MTest_Finalize(errs);
    MPI_Finalize();

    return 0;
}
int main(int argc, char **argv)
{
    int np[2];
    ptrdiff_t n[4], ni[4], no[4];
    ptrdiff_t alloc_local_forw, alloc_local_back, alloc_local, howmany;
    ptrdiff_t local_ni[4], local_i_start[4];
    ptrdiff_t local_n[4], local_start[4];
    ptrdiff_t local_no[4], local_o_start[4];
    double err, *in;
    pfft_complex *out;
    pfft_plan plan_forw=NULL, plan_back=NULL;
    MPI_Comm comm_cart_2d;

    /* Set size of FFT and process mesh */
    ni[0] = ni[1] = ni[2] = ni[3] = 8;
    n[0] = 13;
    n[1] = 14;
    n[2] = 15;
    n[3] = 17;
    for(int t=0; t<4; t++)
        no[t] = ni[t];
    np[0] = 2;
    np[1] = 2;
    howmany = 1;

    /* Initialize MPI and PFFT */
    MPI_Init(&argc, &argv);
    pfft_init();

    /* Create two-dimensional process grid of size np[0] x np[1], if possible */
    if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ) {
        pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]);
        MPI_Finalize();
        return 1;
    }

    /* Get parameters of data distribution */
    alloc_local_forw = pfft_local_size_many_dft_r2c(4, n, ni, n, howmany,
                       PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS,
                       comm_cart_2d, PFFT_TRANSPOSED_OUT,
                       local_ni, local_i_start, local_n, local_start);

    alloc_local_back = pfft_local_size_many_dft_c2r(4, n, n, no, howmany,
                       PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS,
                       comm_cart_2d, PFFT_TRANSPOSED_IN,
                       local_n, local_start, local_no, local_o_start);

    /* Allocate enough memory for both trafos */
    alloc_local = (alloc_local_forw > alloc_local_back) ?
                  alloc_local_forw : alloc_local_back;
    in  = pfft_alloc_real(2 * alloc_local);
    out = pfft_alloc_complex(alloc_local);

    /* Plan parallel forward FFT */
    plan_forw = pfft_plan_many_dft_r2c(
                    4, n, ni, n, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS,
                    in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_OUT| PFFT_MEASURE| PFFT_DESTROY_INPUT);

    /* Plan parallel backward FFT */
    plan_back = pfft_plan_many_dft_c2r(
                    4, n, n, no, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS,
                    out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_IN| PFFT_MEASURE| PFFT_DESTROY_INPUT);

    /* Initialize input with random numbers */
    pfft_init_input_real(4, ni, local_ni, local_i_start,
                         in);

    /* execute parallel forward FFT */
    pfft_execute(plan_forw);

    /* execute parallel backward FFT */
    pfft_execute(plan_back);

    /* Scale data */
    for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2] * local_ni[3]; l++)
        in[l] /= (n[0]*n[1]*n[2]*n[3]);

    /* Print error of back transformed data */
    MPI_Barrier(MPI_COMM_WORLD);
    err = pfft_check_output_real(4, ni, local_ni, local_i_start, in, comm_cart_2d);
    pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td, %td):\n", n[0], n[1], n[2], n[3]);
    pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err);

    /* free mem and finalize */
    pfft_destroy_plan(plan_forw);
    pfft_destroy_plan(plan_back);
    MPI_Comm_free(&comm_cart_2d);
    pfft_free(in);
    pfft_free(out);
    MPI_Finalize();
    return 0;
}
示例#3
0
int main (int argc, char *argv[])
{
    int numtasks, namelen, rank, dest = 1, tag = 111, source = 0, size, i, j;
    double start_time=0, elapsed_time=0, acum;
    double *outmsg, *inmsg;
    char hostname[256];
    MPI_Status status,status2;
    MPI_Request send_request,recv_request;
    
    if (argc < 2)
    {	printf("Usage: %s size [where size is the number elements (double) to send ]\n", argv[0]);
        return 0;
    }
    
    size = atoi(argv[1]);
    
    outmsg=(double*)malloc(sizeof(double)*size);
    if(outmsg==NULL)
    {
        printf("Unable to allocate memory\n");
        return;
    }
    inmsg=(double*)malloc(sizeof(double)*size);
    if(inmsg==NULL)
    {
        printf("Unable to allocate memory\n");
        return;
    }
    
    MPI_Init (&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &numtasks);   // get number of processes
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);       // get current process id
    MPI_Get_processor_name(hostname, &namelen); // get CPU name
    
    //Initialize the msg buffer to the rank id.
    for (i = 0; i < size; i++)
        outmsg[i] = rank;
    
    //Define as Source the left neighbour
    if (rank == 0)  source=numtasks-1;
    else            source=rank-1;
    //Define the destiny the rigth neighbour
    if(rank==numtasks-1) dest=0;
    else                 dest=rank+1;
    
    start_time = MPI_Wtime();
    acum=0;
    for (i=0; i<numtasks; i++) {
        if (rank==0) printf("it: %2d - Rank %d (%s) sending data (%g) to rank %d\n",i,rank, hostname, inmsg[0], dest);
        MPI_Isend(outmsg, size, MPI_DOUBLE, dest, tag,MPI_COMM_WORLD,&send_request);
        MPI_Recv (inmsg, size, MPI_DOUBLE, source, tag, MPI_COMM_WORLD,&status);
        acum = acum + inmsg[0];
        if (rank==0) printf("it: %2d - Rank %d received data (%g) from rank %d (acum=%g)\n",i,rank,outmsg[0],source,acum);
        MPI_Wait(&send_request, &status2);
        //Copy the inmsg to outmsg for the next iteration.
        for (j = 0; j < size; j++) outmsg[j] = inmsg[j];
    }

    MPI_Barrier(MPI_COMM_WORLD);
    elapsed_time = MPI_Wtime() - start_time;
    printf(" Rank %d: Elapsed time to send %6d double(s) across a ring made up by %2d (acum=%g) in %g ms\n", rank, size, numtasks, acum, elapsed_time*1e03);
    
    MPI_Finalize ();
}
示例#4
0
int main(int argc, char **argv) {
    int i, j, rank, nranks, peer, bufsize, errors;
    double  *win_buf, *src_buf, *dst_buf;
    MPI_Win buf_win;

    MTest_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    bufsize = XDIM * YDIM * sizeof(double);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &win_buf);
    /* Alloc_mem is not required for the origin buffers for RMA operations - 
       just for the Win_create memory */
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &src_buf);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &dst_buf);

    if (rank == 0)
        if (verbose) printf("MPI RMA Strided Put Test:\n");

    for (i = 0; i < XDIM*YDIM; i++) {
        *(win_buf  + i) = 1.0 + rank;
        *(src_buf + i) = 1.0 + rank;
    }

    MPI_Win_create(win_buf, bufsize, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &buf_win);

    peer = (rank+1) % nranks;

    /* Perform ITERATIONS strided put operations */

    for (i = 0; i < ITERATIONS; i++) {
      MPI_Aint idx_loc[SUB_YDIM];
      int idx_rem[SUB_YDIM];
      int blk_len[SUB_YDIM];
      MPI_Datatype src_type, dst_type;

      void *base_ptr = dst_buf;
      MPI_Aint base_int;

      MPI_Get_address(base_ptr, &base_int);

      if (rank == 0)
        if (verbose) printf(" + iteration %d\n", i);

      for (j = 0; j < SUB_YDIM; j++) {
        MPI_Get_address(&src_buf[j*XDIM], &idx_loc[j]);
        idx_loc[j] = idx_loc[j] - base_int;
        idx_rem[j] = j*XDIM*sizeof(double);
        blk_len[j] = SUB_XDIM*sizeof(double);
      }

      MPI_Type_create_hindexed(SUB_YDIM, blk_len, idx_loc, MPI_BYTE, &src_type);
      MPI_Type_create_indexed_block(SUB_YDIM, SUB_XDIM*sizeof(double), idx_rem, MPI_BYTE, &dst_type);

      MPI_Type_commit(&src_type);
      MPI_Type_commit(&dst_type);

      MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win);
      MPI_Put(base_ptr, 1, src_type, peer, 0, 1, dst_type, buf_win);
      MPI_Win_unlock(peer, buf_win);

      MPI_Type_free(&src_type);
      MPI_Type_free(&dst_type);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Verify that the results are correct */

    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, buf_win);
    errors = 0;
    for (i = 0; i < SUB_XDIM; i++) {
      for (j = 0; j < SUB_YDIM; j++) {
        const double actual   = *(win_buf + i + j*XDIM);
        const double expected = (1.0 + ((rank+nranks-1)%nranks));
        if (actual - expected > 1e-10) {
          SQUELCH( printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
              rank, j, i, expected, actual); );
          errors++;
          fflush(stdout);
        }
      }
示例#5
0
void FieldStatic::finalize() {
    MPI_Barrier(MPI_COMM_WORLD);
}
示例#6
0
double timeStepper::computeDt(int &numReads, int &numWrites)
{
  // Time step control
  array minSpeedTemp,maxSpeedTemp;
  array minSpeed,maxSpeed;
  elemOld->computeMinMaxCharSpeeds(directions::X1,
                                   minSpeedTemp, maxSpeedTemp,
                                   numReads,numWrites
                                  );
  minSpeedTemp = minSpeedTemp/XCoords->dX1;
  maxSpeedTemp = maxSpeedTemp/XCoords->dX1;
  maxSpeed     = af::max(maxSpeedTemp,af::abs(minSpeedTemp));

  if(params::dim>1)
  {
    elemOld->computeMinMaxCharSpeeds(directions::X2,
                                     minSpeedTemp, maxSpeedTemp,
                                     numReads,numWrites
                                    );
    minSpeedTemp = minSpeedTemp/XCoords->dX2;
    maxSpeedTemp = maxSpeedTemp/XCoords->dX2;
    maxSpeed    += af::max(maxSpeedTemp,af::abs(minSpeedTemp));
  }

  if(params::dim>2)
  {
    elemOld->computeMinMaxCharSpeeds(directions::X3,
                                     minSpeedTemp, maxSpeedTemp,
                                     numReads,numWrites);
    minSpeedTemp = minSpeedTemp/XCoords->dX3;
    maxSpeedTemp = maxSpeedTemp/XCoords->dX3;
    maxSpeed    += af::max(maxSpeedTemp,af::abs(minSpeedTemp));
  }
  array maxInvDt_af = af::max(af::max(af::max(maxSpeed,2),1),0);
  double maxInvDt = maxInvDt_af.host<double>()[0];

  /* Use MPI to find minimum over all processors */
  if (world_rank == 0) 
  {
    double temp; 
    for(int i=1;i<world_size;i++)
    {
      MPI_Recv(&temp, 1, MPI_DOUBLE, i, i, PETSC_COMM_WORLD,MPI_STATUS_IGNORE);
      if( maxInvDt < temp)
      {
        maxInvDt = temp;
      }
    }
  }
  else
  {
    MPI_Send(&maxInvDt, 1, MPI_DOUBLE, 0, world_rank, PETSC_COMM_WORLD);
  }
  MPI_Barrier(PETSC_COMM_WORLD);
  MPI_Bcast(&maxInvDt,1,MPI_DOUBLE,0,PETSC_COMM_WORLD);
  MPI_Barrier(PETSC_COMM_WORLD);
  
  double newDt = params::CourantFactor/maxInvDt;
    
  if (newDt > params::maxDtIncrement*dt)
  {
    newDt = params::maxDtIncrement*dt;
  }
  dt = newDt;
}
示例#7
0
int main (int argc, char **argv)
{
#ifdef VSG_HAVE_MPI
  VsgPRTreeParallelConfig pconfig = {{NULL,}};
#endif

  VsgVector3d lbound = {-TR, -TR, -TR};
  VsgVector3d ubound = {TR, TR, TR};
  VsgPRTree3d *prtree;
  AranSolver3d *solver;
  int ret = 0;
  GTimer *timer = NULL;

#ifdef VSG_HAVE_MPI
  MPI_Init (&argc, &argv);

  MPI_Comm_size (MPI_COMM_WORLD, &sz);
  MPI_Comm_rank (MPI_COMM_WORLD, &rk);
#endif

  aran_init();

  parse_args (argc, argv);

#ifdef VSG_HAVE_MPI
  pconfig.communicator = MPI_COMM_WORLD;

  pconfig.point = point_accum_vtable;

  aran_development3d_vtable_init (&pconfig.node_data, 0, order);
#endif

  /* points = g_ptr_array_new (); */

  if (check)
    {
      _cp_size = MAX (np, 128);
      check_points = g_malloc0 (_cp_size * sizeof (PointAccum));
    }

  prtree =
    vsg_prtree3d_new_full (&lbound, &ubound,
                            (VsgPoint3dLocFunc) vsg_vector3d_vector3d_locfunc,
                            (VsgPoint3dDistFunc) vsg_vector3d_dist,
                            (VsgRegion3dLocFunc) NULL, maxbox);

  solver = aran_solver3d_new (prtree, ARAN_TYPE_DEVELOPMENT3D,
                              aran_development3d_new (0, order),
                              (AranZeroFunc) aran_development3d_set_zero);

#ifdef VSG_HAVE_MPI
  aran_solver3d_set_parallel (solver, &pconfig);
#endif

  if (virtual_maxbox != 0)
    aran_solver3d_set_nf_isleaf (solver, _nf_isleaf_virtual_maxbox,
                                 &virtual_maxbox);

  aran_solver3d_set_functions (solver,
                               (AranParticle2ParticleFunc3d) p2p,
                               (AranParticle2MultipoleFunc3d) p2m,
                               m2m,
                               m2l,
                               l2l,
                               (AranLocal2ParticleFunc3d) l2p);

  if (semifar_threshold < G_MAXUINT)
    {
      aran_solver3d_set_functions_full (solver,
                                        (AranParticle2ParticleFunc3d) p2p,
                                        (AranParticle2MultipoleFunc3d) p2m,
                                        m2m,
                                        m2l,
                                        l2l,
                                        (AranLocal2ParticleFunc3d) l2p,
                                        (AranParticle2LocalFunc3d) p2l,
                                        (AranMultipole2ParticleFunc3d) m2p,
                                        semifar_threshold);

      if (semifar_threshold == 0)
        {
          PointAccum p1 = {{0.1, 0.1, 0.1}, 0.1, {0., 0., 0.}, 0};
          PointAccum p2 = {{-0.1, -0.1, -0.1}, 0.1, {0., 0., 0.}, 1};

          /* compute operators timings to be able to compute optimal solver parameters */
          aran_solver3d_profile_operators (solver, (AranParticleInitFunc3d) point_accum_clear_accum,
                                           &p1, &p2);

          /* alternatively, we could get timings from profile databases */
          /* aran_profile_db_read_file ("./profiledb-newtonfield3.ini", NULL); */
          /* aran_solver3d_db_profile_operators (solver, (gdouble) order); */

        }
      
    }

  if (_hilbert)
    {
      /* configure for hilbert curve order traversal */
      aran_solver3d_set_children_order_hilbert (solver);
    }

  if (_verbose)
    {
      g_printerr ("%d : fill begin\n", rk);
      g_printerr ("%d : memory peak1 count = %u\n", rk, getpeak(0));


#ifdef VSG_HAVE_MPI
      MPI_Barrier (MPI_COMM_WORLD);
#endif

      timer = g_timer_new ();
    }

  _fill (solver);

  if (_verbose)
    {
      g_printerr ("%d : fill elapsed=%f seconds\n", rk,
                  g_timer_elapsed (timer, NULL));

      g_printerr ("%d : tree depth count = %d\n", rk,
                  aran_solver3d_depth (solver));

      g_printerr ("%d : particle count=%d\n", rk,
                  aran_solver3d_point_count (solver));

      g_timer_destroy (timer);
  /* g_mem_profile(); */
    }

  if (_verbose)
    {
      g_printerr ("%d : solve begin\n", rk);
      g_printerr ("%d : memory peak2 count = %u\n", rk, getpeak(0));


#ifdef VSG_HAVE_MPI
      MPI_Barrier (MPI_COMM_WORLD);
#endif

      timer = g_timer_new ();
    }

  aran_solver3d_solve (solver);

  if (_verbose)
    {
#ifdef VSG_HAVE_MPI
      MPI_Barrier (MPI_COMM_WORLD);
#endif

      g_printerr ("%d : solve ok elapsed=%f seconds\n", rk,
                  g_timer_elapsed (timer, NULL));
      g_printerr ("%d : memory peak3 count = %u\n", rk, getpeak(0));


      g_timer_destroy (timer);

      {
        glong zero_count, p2p_count, p2m_count, m2m_count;
        glong m2l_count, l2l_count, l2p_count, p2l_count, m2p_count;
        glong p2p_remote_count, m2l_remote_count;

        aran_solver3d_get_stats (solver, &zero_count,
                                 &p2p_count, &p2m_count,
                                 &m2m_count, &m2l_count,
                                 &l2l_count, &l2p_count,
                                 &p2l_count, &m2p_count,
                                 &p2p_remote_count,
                                 &m2l_remote_count);

        g_printerr ("%d : zero count=%ld\n", rk, zero_count);
        g_printerr ("%d : p2p count=%ld\n", rk, p2p_count);
        g_printerr ("%d : p2p remote count=%ld\n", rk, p2p_remote_count);
        g_printerr ("%d : p2m count=%ld\n", rk, p2m_count);
        g_printerr ("%d : m2m count=%ld\n", rk, m2m_count);
        g_printerr ("%d : m2l count=%ld\n", rk, m2l_count);
        g_printerr ("%d : m2l remote count=%ld\n", rk, m2l_remote_count);
        g_printerr ("%d : l2l count=%ld\n", rk, l2l_count);
        g_printerr ("%d : l2p count=%ld\n", rk, l2p_count);
        g_printerr ("%d : p2l count=%ld\n", rk, p2l_count);
        g_printerr ("%d : m2p count=%ld\n", rk, m2p_count);
      }
    }

  if (_write)
    {
      gchar fn[1024];
      FILE *f;

      g_sprintf (fn, "tree%03d.txt", rk);
      f = fopen (fn, "w");
      vsg_prtree3d_write (prtree, f);
      fclose (f);

      _tree_write (prtree, "solv");
      _vtp_tree_write (solver, "solv");
    }

  if (_save_fma_filename != NULL)
    {
      FILE *file = fopen (_save_fma_filename, "w");
      aran_solver3d_write_fma (solver, file);
      fclose (file);
    }

  if (check)
    {
      guint64 i, j;

      if (sz == 1)
        {
          for (i=0; i<np; i ++)
            {
              PointAccum *pi = &check_points[i];

              for (j=0; j<np; j ++)
                {
                  if (j != i)
                    {
                      PointAccum *pj = &check_points[j];
                      p2p_one_way (pi, pj);
                    }
                }
            }
        }
      else
        check_parallel_points (solver);

      aran_solver3d_foreach_point (solver, (GFunc) check_point_field, &ret);

      if (_verbose)
        g_printerr ("%d : max err = %e\n", rk, maxerr);

      g_free (check_points);
    }

  aran_solver3d_free (solver);

#ifdef VSG_HAVE_MPI
  aran_development3d_vtable_clear (&pconfig.node_data);
#endif

  /* g_ptr_array_free (points, TRUE); */

  if (_load_file != NULL) g_free (_load_file);

#ifdef VSG_HAVE_MPI
  MPI_Finalize ();
#endif

  return ret;
}
示例#8
0
文件: t_mpi.c 项目: Len3d/appleseed
static int
test_mpio_special_collective(char *filename)
{
    int  mpi_size, mpi_rank;
    MPI_File fh;
    MPI_Datatype etype,buftype,filetype;
    char mpi_err_str[MPI_MAX_ERROR_STRING];
    int  mpi_err_strlen;
    int  mpi_err;
    char writedata[2];
    char *buf;
    int  i;
    int  count,bufcount;
    int blocklens[2];
    MPI_Aint offsets[2];
    MPI_Offset  mpi_off;
    MPI_Status  mpi_stat;
    int  retcode;

    MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
    retcode = 0;

    /* create MPI data type */
    etype = MPI_BYTE;
    if(mpi_rank == 0 || mpi_rank == 1) {
        count = DIMSIZE;
        bufcount = 1;
    }
    else {
        count = 0;
        bufcount = 0;
    }

    blocklens[0] = count;
    offsets[0] = mpi_rank*count;
    blocklens[1] = count;
    offsets[1] = (mpi_size+mpi_rank)*count;

    if(count !=0) {
      if((mpi_err= MPI_Type_hindexed(2,blocklens,offsets,etype,&filetype))
       != MPI_SUCCESS){
      	MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_Type_contiguous failed (%s)\n", mpi_err_str);
	return 1;
      }

      if((mpi_err=MPI_Type_commit(&filetype))!=MPI_SUCCESS){
        MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_Type_commit failed (%s)\n", mpi_err_str);
	return 1;
      }


      if((mpi_err= MPI_Type_hindexed(2,blocklens,offsets,etype,&buftype))
       != MPI_SUCCESS){
      	MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_Type_contiguous failed (%s)\n", mpi_err_str);
	return 1;
      }

      if((mpi_err=MPI_Type_commit(&buftype))!=MPI_SUCCESS){
        MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_Type_commit failed (%s)\n", mpi_err_str);
	return 1;
      }
     }
     else {

       filetype = MPI_BYTE;
       buftype  = MPI_BYTE;
     }

   /* Open a file */
    if ((mpi_err = MPI_File_open(MPI_COMM_WORLD, filename,
	    MPI_MODE_RDWR | MPI_MODE_CREATE ,
	    MPI_INFO_NULL, &fh))
	    != MPI_SUCCESS){
	MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_File_open failed (%s)\n", mpi_err_str);
	return 1;
    }

    /* each process writes some data */
    for (i=0; i < 2*DIMSIZE; i++)
	writedata[i] = mpi_rank*DIMSIZE + i;


     mpi_off = 0;
    if((mpi_err = MPI_File_set_view(fh, mpi_off, MPI_BYTE, filetype, "native", MPI_INFO_NULL))
        != MPI_SUCCESS) {
	MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_File_set_view failed (%s)\n", mpi_err_str);
	return 1;
    }

    buf   = writedata;
    if ((mpi_err = MPI_File_write_at_all(fh, mpi_off, buf, bufcount, buftype,
	    &mpi_stat))
	    != MPI_SUCCESS){
	MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_File_write_at offset(%ld), bytes (%d), failed (%s)\n",
		(long) mpi_off, bufcount, mpi_err_str);
	return 1;
    };

     if ((mpi_err = MPI_File_close(&fh))
	    != MPI_SUCCESS){
	MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_File_close failed. \n");
	return 1;
    };

    mpi_err = MPI_Barrier(MPI_COMM_WORLD);
#ifdef H5_MPI_SPECIAL_COLLECTIVE_IO_WORKS
    if(retcode != 0) {
	if(mpi_rank == 0) {
	    printf("special collective IO is NOT working at this platform\n");
	    printf("Go back to hdf5/config and find the corresponding\n");
	    printf("configure-specific file (for example, powerpc-ibm-aix5.x) and add\n");
	    printf("hdf5_cv_mpi_special_collective_io_works=${hdf5_cv_mpi_special_collective_io_works='no'}\n");
	    printf(" at the end of the file.\n");
	    printf(" Please report to [email protected] about this problem.\n");
	}
	retcode = 1;
    }
#else
    if(retcode == 0) {
	if(mpi_rank == 0) {
	    printf(" This is NOT an error, What it really says is\n");
	    printf("special collective IO is WORKING at this platform\n");
	    printf(" Go back to hdf5/config and find the corresponding \n");
	    printf(" configure-specific file (for example, powerpc-ibm-aix5.x) and delete the line\n");
	    printf("hdf5_cv_mpi_special_collective_io_works=${hdf5_cv_mpi_special_collective_io_works='no'}\n");
	    printf(" at the end of the file.\n");
	    printf("Please report to [email protected] about this problem.\n");
	}
	retcode = 1;
    }
#endif
    return retcode;
}
示例#9
0
	void system::set_problem(const bool init) 
	{
		if (myproc == 0)
			fprintf(stderr, " ********* Setting up MHD Turbulence ************* \n");

		const int reserve_n = (int)(1.25*local_n);
		U.reserve(reserve_n);
		dU.reserve(reserve_n);
		Wgrad.reserve(reserve_n);

		U.resize(local_n);
		dU.resize(local_n);
		Wgrad.resize(local_n);


		gamma_gas = 1.0;
		courant_no = 0.4;

		for (int i = 0; i < local_n; i++) 
		{
			assert(U[i][Fluid::DENS] > 0.0);
			U[i][Fluid::PSI ] = 0.0;

			for (int k = 0 ; k < Fluid::NSCALARS; k++)
				U[i].scal(k) = 1.0;

			dU[i] = Fluid(0.0);
			Wgrad[i] = 0.0;
			for (int k = 0; k < Fluid::NFLUID; k++)
				Wgrad[i].m[k] = U[i][k];
			U[i] = U[i].to_conservative(cells[i].Volume);
			ptcl[i].Volume = cells[i].Volume;
		}
		entropy_scalar = -1;
		isoeos_flag = true;

		MPI_Barrier(MPI_COMM_WORLD);
		if (myproc == 0)
			fprintf(stderr , " pvel ... \n");

		get_active_ptcl(true);


		MPI_Barrier(MPI_COMM_WORLD);
		if (myproc == 0)
			fprintf(stderr , " primitives ... \n");

		exchange_primitive_and_wdot();


		MPI_Barrier(MPI_COMM_WORLD);
		compute_pvel();
		exchange_pvel();

		MPI_Barrier(MPI_COMM_WORLD);
		if (myproc == 0)
			fprintf(stderr , " tgradients ... \n");
		compute_tgradient();

		if (myproc == 0)
			fprintf(stderr , " timestep... \n");
		compute_timesteps(true);
		for (int i = 0; i < local_n; i++)
			ptcl[i].rung[0] += 3;

		all_active = true;
		scheduler.flush_list();
		for (int i = 0; i < local_n; i++)
			scheduler.push_particle(i, (int)ptcl[i].rung[0]);

		MPI_Barrier(MPI_COMM_WORLD);
		if (!eulerian)
			clear_mesh();

		if (myproc == 0) fprintf(stderr, " proc= %d: complete problem setup \n", myproc);
		MPI_Barrier(MPI_COMM_WORLD);


	}
示例#10
0
文件: t_mpi.c 项目: Len3d/appleseed
static int
test_mpio_1wMr(char *filename, int special_request)
{
    char hostname[128];
    int  mpi_size, mpi_rank;
    MPI_File fh;
    char mpi_err_str[MPI_MAX_ERROR_STRING];
    int  mpi_err_strlen;
    int  mpi_err;
    unsigned char writedata[DIMSIZE], readdata[DIMSIZE];
    unsigned char expect_val;
    int  i, irank;
    int  nerrs = 0;		/* number of errors */
    int  atomicity;
    MPI_Offset  mpi_off;
    MPI_Status  mpi_stat;

    MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);

    if (MAINPROCESS && VERBOSE_MED){
        printf("Testing one process writes, all processes read.\n");
	printf("Using %d processes accessing file %s\n", mpi_size, filename);
        printf("    (Filename can be specified via program argument)\n");
    }

    /* show the hostname so that we can tell where the processes are running */
    if (VERBOSE_DEF){
	if (gethostname(hostname, 128) < 0){
	    PRINTID;
	    printf("gethostname failed\n");
	    return 1;
	}
	PRINTID;
	printf("hostname=%s\n", hostname);
    }

    /* Delete any old file in order to start anew. */
    /* Must delete because MPI_File_open does not have a Truncate mode. */
    /* Don't care if it has error. */
    MPI_File_delete(filename, MPI_INFO_NULL);
    MPI_Barrier(MPI_COMM_WORLD);	/* prevent racing condition */

    if ((mpi_err = MPI_File_open(MPI_COMM_WORLD, filename,
	    MPI_MODE_RDWR | MPI_MODE_CREATE ,
	    MPI_INFO_NULL, &fh))
	    != MPI_SUCCESS){
	MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	PRINTID;
	printf("MPI_File_open failed (%s)\n", mpi_err_str);
	return 1;
    }

if (special_request & USEATOM){
    /* ==================================================
     * Set atomcity to true (1).  A POSIX compliant filesystem
     * should not need this.
     * ==================================================*/
    if ((mpi_err = MPI_File_get_atomicity(fh, &atomicity)) != MPI_SUCCESS){
	MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	PRINTID;
	printf("MPI_File_get_atomicity failed (%s)\n", mpi_err_str);
    }
    if (VERBOSE_HI)
	printf("Initial atomicity = %d\n", atomicity);
    if ((mpi_err = MPI_File_set_atomicity(fh, 1)) != MPI_SUCCESS){
	MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	PRINTID;
	printf("MPI_File_set_atomicity failed (%s)\n", mpi_err_str);
    }
    if ((mpi_err = MPI_File_get_atomicity(fh, &atomicity)) != MPI_SUCCESS){
	MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	PRINTID;
	printf("MPI_File_get_atomicity failed (%s)\n", mpi_err_str);
    }
    if (VERBOSE_HI)
	printf("After set_atomicity atomicity = %d\n", atomicity);
}

    /* This barrier is not necessary but do it anyway. */
    MPI_Barrier(MPI_COMM_WORLD);
    if (VERBOSE_HI){
	PRINTID;
	printf("between MPI_Barrier and MPI_File_write_at\n");
    }

    /* ==================================================
     * Each process calculates what to write but
     * only process irank(0) writes.
     * ==================================================*/
    irank=0;
    for (i=0; i < DIMSIZE; i++)
	writedata[i] = irank*DIMSIZE + i;
    mpi_off = irank*DIMSIZE;

    /* Only one process writes */
    if (mpi_rank==irank){
	if (VERBOSE_HI){
	    PRINTID; printf("wrote %d bytes at %ld\n", DIMSIZE, (long)mpi_off);
	}
	if ((mpi_err = MPI_File_write_at(fh, mpi_off, writedata, DIMSIZE,
			MPI_BYTE, &mpi_stat))
		!= MPI_SUCCESS){
	    MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	    PRINTID;
	    printf("MPI_File_write_at offset(%ld), bytes (%d), failed (%s)\n",
		    (long) mpi_off, DIMSIZE, mpi_err_str);
	    return 1;
	};
    };

    /* Bcast the return code and */
    /* make sure all writing are done before reading. */
    MPI_Bcast(&mpi_err, 1, MPI_INT, irank, MPI_COMM_WORLD);
    if (VERBOSE_HI){
	PRINTID;
	printf("MPI_Bcast: mpi_err = %d\n", mpi_err);
    }

if (special_request & USEFSYNC){
    /* ==================================================
     * Do a file sync.  A POSIX compliant filesystem
     * should not need this.
     * ==================================================*/
    if (VERBOSE_HI)
	printf("Apply MPI_File_sync\n");
    /* call file_sync to force the write out */
    if ((mpi_err = MPI_File_sync(fh)) != MPI_SUCCESS){
	MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	PRINTID;
	printf("MPI_File_sync failed (%s)\n", mpi_err_str);
    }
    MPI_Barrier(MPI_COMM_WORLD);
    /* call file_sync to force the write out */
    if ((mpi_err = MPI_File_sync(fh)) != MPI_SUCCESS){
	MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	PRINTID;
	printf("MPI_File_sync failed (%s)\n", mpi_err_str);
    }
}

    /* This barrier is not necessary because the Bcase or File_sync above */
    /* should take care of it.  Do it anyway. */
    MPI_Barrier(MPI_COMM_WORLD);
    if (VERBOSE_HI){
	PRINTID;
	printf("after MPI_Barrier\n");
    }

    /* ==================================================
     * Each process reads what process 0 wrote and verify.
     * ==================================================*/
    irank=0;
    mpi_off = irank*DIMSIZE;
    if ((mpi_err = MPI_File_read_at(fh, mpi_off, readdata, DIMSIZE, MPI_BYTE,
	    &mpi_stat))
	    != MPI_SUCCESS){
	MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	PRINTID;
	printf("MPI_File_read_at offset(%ld), bytes (%d), failed (%s)\n",
		(long) mpi_off, DIMSIZE, mpi_err_str);
	return 1;
    };
    for (i=0; i < DIMSIZE; i++){
	expect_val = irank*DIMSIZE + i;
	if (readdata[i] != expect_val){
	    PRINTID;
	    printf("read data[%d:%d] got %02x, expect %02x\n", irank, i,
		    readdata[i], expect_val);
	    nerrs++;
	}
    }

    MPI_File_close(&fh);

    if (VERBOSE_HI){
	PRINTID;
	printf("%d data errors detected\n", nerrs);
    }

    mpi_err = MPI_Barrier(MPI_COMM_WORLD);
    return nerrs;
}
示例#11
0
文件: t_mpi.c 项目: Len3d/appleseed
static int test_mpio_derived_dtype(char *filename) {

    MPI_File fh;
    char mpi_err_str[MPI_MAX_ERROR_STRING];
    int  mpi_err_strlen;
    int  mpi_err;
    int  i;
    int  nerrors = 0;		/* number of errors */
    MPI_Datatype  etype,filetype;
    MPI_Datatype  adv_filetype,bas_filetype[2];
    MPI_Datatype  etypenew, filetypenew;
    MPI_Offset    disp;
    MPI_Status    Status;
    MPI_Aint      adv_disp[2];
    MPI_Aint      offsets[1];
    int           blocklens[1],adv_blocklens[2];
    int           count,outcount;
    int           retcode;

    int mpi_rank,mpi_size;

    char          buf[3],outbuf[3] = {0};

    MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
    retcode = 0;
    for(i=0;i<3;i++)
      buf[i] = i+1;


    if ((mpi_err = MPI_File_open(MPI_COMM_WORLD, filename,
				 MPI_MODE_RDWR | MPI_MODE_CREATE,
				 MPI_INFO_NULL, &fh))
	    != MPI_SUCCESS){
	MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_File_open failed (%s)\n", mpi_err_str);
	return 1;
    }

    disp  = 0;
    etype = MPI_BYTE;

    count = 1;
    blocklens[0] = 1;
    offsets[0]   = 0;

    if((mpi_err= MPI_Type_hindexed(count,blocklens,offsets,MPI_BYTE,&filetype))
       != MPI_SUCCESS){
      	MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_Type_contiguous failed (%s)\n", mpi_err_str);
	return 1;
    }

    if((mpi_err=MPI_Type_commit(&filetype))!=MPI_SUCCESS){
        MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_Type_commit failed (%s)\n", mpi_err_str);
	return 1;
    }

    count = 1;
    blocklens[0]=1;
    offsets[0] = 1;
    if((mpi_err= MPI_Type_hindexed(count,blocklens,offsets,MPI_BYTE,&filetypenew))
       != MPI_SUCCESS){
      	MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_Type_contiguous failed (%s)\n", mpi_err_str);
	return 1;
    }

    if((mpi_err=MPI_Type_commit(&filetypenew))!=MPI_SUCCESS){
        MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_Type_commit failed (%s)\n", mpi_err_str);
	return 1;
    }

    outcount         = 2;
    adv_blocklens[0] = 1;
    adv_blocklens[1] = 1;
    adv_disp[0]      = 0;
    adv_disp[1]      = 1;
    bas_filetype[0]  = filetype;
    bas_filetype[1]  = filetypenew;

    if((mpi_err= MPI_Type_struct(outcount,adv_blocklens,adv_disp,bas_filetype,&adv_filetype))
       != MPI_SUCCESS){
      	MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_Type_struct failed (%s)\n", mpi_err_str);
	return 1;
    }
    if((mpi_err=MPI_Type_commit(&adv_filetype))!=MPI_SUCCESS){
        MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_Type_commit failed (%s)\n", mpi_err_str);
	return 1;
    }


    if((mpi_err = MPI_File_set_view(fh,disp,etype,adv_filetype,"native",MPI_INFO_NULL))!= MPI_SUCCESS){
      MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_File_set_view failed (%s)\n", mpi_err_str);
	return 1;
    }

    if((mpi_err = MPI_File_write(fh,buf,3,MPI_BYTE,&Status))!= MPI_SUCCESS){
        MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_File_write failed (%s)\n", mpi_err_str);
	return 1;
      ;
    }


    if((mpi_err = MPI_File_close(&fh)) != MPI_SUCCESS){
       MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_File_close failed (%s)\n", mpi_err_str);
	return 1;
    }


    if((mpi_err = MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_RDONLY,MPI_INFO_NULL,&fh)) != MPI_SUCCESS){
       MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_File_open failed (%s)\n", mpi_err_str);
	return 1;
    }

    if((mpi_err = MPI_File_set_view(fh,0,MPI_BYTE,MPI_BYTE,"native",MPI_INFO_NULL))!= MPI_SUCCESS){
        MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_File_set_view failed (%s)\n", mpi_err_str);
	return 1;
    }
    if((mpi_err = MPI_File_read(fh,outbuf,3,MPI_BYTE,&Status))!=MPI_SUCCESS){
      MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
      printf("MPI_File_read failed (%s)\n", mpi_err_str);
      return 1;
    }

    if(outbuf[2]==2) {
       retcode = 0;
    }
    else {
/*      if(mpi_rank == 0) {
       printf("complicated derived datatype is NOT working at this platform\n");
       printf("go back to hdf5/config and find the corresponding\n");
       printf("configure-specific file and change ?????\n");
      }
*/
       retcode = -1;
   }

    if((mpi_err = MPI_File_close(&fh)) != MPI_SUCCESS){
       MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
	printf("MPI_File_close failed (%s)\n", mpi_err_str);
	return 1;
    }


    mpi_err = MPI_Barrier(MPI_COMM_WORLD);
#ifdef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
    if(retcode == -1) {
	if(mpi_rank == 0) {
	    printf("Complicated derived datatype is NOT working at this platform\n");
	    printf("Go back to hdf5/config and find the corresponding\n");
	    printf("configure-specific file (for example, powerpc-ibm-aix5.x) and add\n");
	    printf("hdf5_cv_mpi_complex_derived_datatype_works=${hdf5_cv_mpi_complex_derived_datatype-works='no'}\n");
	    printf(" at the end of the file.\n");
	    printf(" Please report to [email protected] about this problem.\n");
	}
	retcode = 1;
    }
#else
    if(retcode == 0) {
	if(mpi_rank == 0) {
	    printf(" This is NOT an error, What it really says is\n");
	    printf("Complicated derived datatype is WORKING at this platform\n");
	    printf(" Go back to hdf5/config and find the corresponding \n");
	    printf(" configure-specific file (for example, powerpc-ibm-aix5.x) and delete the line\n");
	    printf("hdf5_cv_mpi_complex_derived_datatype_works=${hdf5_cv_mpi_complex_derived_datatype-works='no'}\n");
	    printf(" at the end of the file.\n");
	    printf("Please report to [email protected] about this problem.\n");
	}
	retcode = 1;
    }
    if(retcode == -1) retcode = 0;
#endif
    return retcode;
}
示例#12
0
文件: t_mpi.c 项目: Len3d/appleseed
static int
test_mpio_overlap_writes(char *filename)
{
    int mpi_size, mpi_rank;
    MPI_Comm comm;
    MPI_Info info = MPI_INFO_NULL;
    int color, mrc;
    MPI_File	fh;
    int i;
    int vrfyerrs, nerrs;
    unsigned char  buf[4093];		/* use some prime number for size */
    int bufsize = sizeof(buf);
    MPI_Offset  stride;
    MPI_Offset  mpi_off;
    MPI_Status  mpi_stat;


    if (VERBOSE_MED)
	printf("MPIO independent overlapping writes test on file %s\n",
	    filename);

    nerrs = 0;
    /* set up MPI parameters */
    MPI_Comm_size(MPI_COMM_WORLD,&mpi_size);
    MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank);

    /* Need at least 2 processes */
    if (mpi_size < 2) {
	if (MAINPROCESS)
	    printf("Need at least 2 processes to run MPIO test.\n");
	    printf(" -SKIP- \n");
	return 0;
    }

    /* splits processes 0 to n-2 into one comm. and the last one into another */
    color = ((mpi_rank < (mpi_size - 1)) ? 0 : 1);
    mrc = MPI_Comm_split (MPI_COMM_WORLD, color, mpi_rank, &comm);
    VRFY((mrc==MPI_SUCCESS), "Comm_split succeeded");

    if (color==0){
	/* First n-1 processes (color==0) open a file and write it */
	mrc = MPI_File_open(comm, filename, MPI_MODE_CREATE|MPI_MODE_RDWR,
		info, &fh);
	VRFY((mrc==MPI_SUCCESS), "");

	stride = 1;
	mpi_off = mpi_rank*stride;
	while (mpi_off < MPIO_TEST_WRITE_SIZE){
	    /* make sure the write does not exceed the TEST_WRITE_SIZE */
	    if (mpi_off+stride > MPIO_TEST_WRITE_SIZE)
		stride = MPIO_TEST_WRITE_SIZE - mpi_off;

	    /* set data to some trivial pattern for easy verification */
	    for (i=0; i<stride; i++)
		buf[i] = (unsigned char)(mpi_off+i);
	    mrc = MPI_File_write_at(fh, mpi_off, buf, (int)stride, MPI_BYTE,
		    &mpi_stat);
	    VRFY((mrc==MPI_SUCCESS), "");

	    /* move the offset pointer to last byte written by all processes */
	    mpi_off += (mpi_size - 1 - mpi_rank) * stride;

	    /* Increase chunk size without exceeding buffer size. */
	    /* Then move the starting offset for next write. */
	    stride *= 2;
	    if (stride > bufsize)
		stride = bufsize;
	    mpi_off += mpi_rank*stride;
	}

	/* close file and free the communicator */
	mrc = MPI_File_close(&fh);
	VRFY((mrc==MPI_SUCCESS), "MPI_FILE_CLOSE");
	mrc = MPI_Comm_free(&comm);
	VRFY((mrc==MPI_SUCCESS), "MPI_Comm_free");

	/* sync with the other waiting processes */
	mrc = MPI_Barrier(MPI_COMM_WORLD);
	VRFY((mrc==MPI_SUCCESS), "Sync after writes");
    }else{
	/* last process waits till writes are done,
	 * then opens file to verify data.
	 */
	mrc = MPI_Barrier(MPI_COMM_WORLD);
	VRFY((mrc==MPI_SUCCESS), "Sync after writes");

	mrc = MPI_File_open(comm, filename, MPI_MODE_RDONLY,
		info, &fh);
	VRFY((mrc==MPI_SUCCESS), "");

	stride = bufsize;
	for (mpi_off=0; mpi_off < MPIO_TEST_WRITE_SIZE; mpi_off += bufsize){
	    /* make sure it does not read beyond end of data */
	    if (mpi_off+stride > MPIO_TEST_WRITE_SIZE)
		stride = MPIO_TEST_WRITE_SIZE - mpi_off;
	    mrc = MPI_File_read_at(fh, mpi_off, buf, (int)stride, MPI_BYTE,
		    &mpi_stat);
	    VRFY((mrc==MPI_SUCCESS), "");
	    vrfyerrs=0;
	    for (i=0; i<stride; i++){
		unsigned char expected;
		expected = (unsigned char)(mpi_off+i);
		if ((expected != buf[i]) &&
		    (vrfyerrs++ < MAX_ERR_REPORT || VERBOSE_MED)) {
			printf("proc %d: found data error at [%ld], expect %u, got %u\n",
			    mpi_rank, (long)(mpi_off+i), expected, buf[i]);
		}
	    }
	    if (vrfyerrs > MAX_ERR_REPORT && !VERBOSE_MED)
		printf("proc %d: [more errors ...]\n", mpi_rank);

	    nerrs += vrfyerrs;
	}

	/* close file and free the communicator */
	mrc = MPI_File_close(&fh);
	VRFY((mrc==MPI_SUCCESS), "MPI_FILE_CLOSE");
	mrc = MPI_Comm_free(&comm);
	VRFY((mrc==MPI_SUCCESS), "MPI_Comm_free");
    }

    /*
     * one more sync to ensure all processes have done reading
     * before ending this test.
     */
    mrc = MPI_Barrier(MPI_COMM_WORLD);
    VRFY((mrc==MPI_SUCCESS), "Sync before leaving test");
    return (nerrs);
}
示例#13
0
文件: t_mpi.c 项目: Len3d/appleseed
/*
 * Verify that MPI_Offset exceeding 2**31 can be computed correctly.
 * Print any failure as information only, not as an error so that this
 * won't abort the remaining test or other separated tests.
 *
 * Test if MPIO can write file from under 2GB to over 2GB and then
 * from under 4GB to over 4GB.
 * Each process writes 1MB in round robin fashion.
 * Then reads the file back in by reverse order, that is process 0
 * reads the data of process n-1 and vice versa.
 */
static int
test_mpio_gb_file(char *filename)
{
    int mpi_size, mpi_rank;
    MPI_Info info = MPI_INFO_NULL;
    int mrc;
    MPI_File	fh;
    int i, j, n;
    int vrfyerrs;
    int writerrs;		/* write errors */
    int nerrs;
    int ntimes;			/* how many times */
    char  *buf = NULL;
    char  expected;
    MPI_Offset  size;
    MPI_Offset  mpi_off;
    MPI_Offset  mpi_off_old;
    MPI_Status  mpi_stat;
    struct stat stat_buf;
    int is_signed, sizeof_mpi_offset;

    nerrs = 0;
    /* set up MPI parameters */
    MPI_Comm_size(MPI_COMM_WORLD,&mpi_size);
    MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank);

    if (VERBOSE_MED)
        printf("MPI_Offset range test\n");

    /* figure out the signness and sizeof MPI_Offset */
    mpi_off = 0;
    is_signed = ((MPI_Offset)(mpi_off - 1)) < 0;
    sizeof_mpi_offset = (int)(sizeof(MPI_Offset));

    /*
     * Verify the sizeof MPI_Offset and correctness of handling multiple GB
     * sizes.
     */
    if (MAINPROCESS){			/* only process 0 needs to check it*/
	printf("MPI_Offset is %s %d bytes integeral type\n",
	    is_signed ? "signed" : "unsigned", (int)sizeof(MPI_Offset));
	if (sizeof_mpi_offset <= 4 && is_signed){
	    printf("Skipped 2GB range test "
		    "because MPI_Offset cannot support it\n");
	}else {
	    /* verify correctness of assigning 2GB sizes */
	    mpi_off = 2 * 1024 * (MPI_Offset)MB;
	    INFO((mpi_off>0), "2GB OFFSET assignment no overflow");
	    INFO((mpi_off-1)==TWO_GB_LESS1, "2GB OFFSET assignment succeed");

	    /* verify correctness of increasing from below 2 GB to above 2GB */
	    mpi_off = TWO_GB_LESS1;
	    for (i=0; i < 3; i++){
		mpi_off_old = mpi_off;
		mpi_off = mpi_off + 1;
		/* no overflow */
		INFO((mpi_off>0), "2GB OFFSET increment no overflow");
		/* correct inc. */
		INFO((mpi_off-1)==mpi_off_old, "2GB OFFSET increment succeed");
	    }
	}

	if (sizeof_mpi_offset <= 4){
	    printf("Skipped 4GB range test "
		    "because MPI_Offset cannot support it\n");
	}else {
	    /* verify correctness of assigning 4GB sizes */
	    mpi_off = 4 * 1024 * (MPI_Offset)MB;
	    INFO((mpi_off>0), "4GB OFFSET assignment no overflow");
	    INFO((mpi_off-1)==FOUR_GB_LESS1, "4GB OFFSET assignment succeed");

	    /* verify correctness of increasing from below 4 GB to above 4 GB */
	    mpi_off = FOUR_GB_LESS1;
	    for (i=0; i < 3; i++){
		mpi_off_old = mpi_off;
		mpi_off = mpi_off + 1;
		/* no overflow */
		INFO((mpi_off>0), "4GB OFFSET increment no overflow");
		/* correct inc. */
		INFO((mpi_off-1)==mpi_off_old, "4GB OFFSET increment succeed");
	    }
	}
    }

    /*
     * Verify if we can write to a file of multiple GB sizes.
     */
    if (VERBOSE_MED)
	printf("MPIO GB file test %s\n", filename);

    if (sizeof_mpi_offset <= 4){
	printf("Skipped GB file range test "
		"because MPI_Offset cannot support it\n");
    }else{
	buf = malloc(MB);
	VRFY((buf!=NULL), "malloc succeed");

	/* open a new file. Remove it first in case it exists. */
	/* Must delete because MPI_File_open does not have a Truncate mode. */
	/* Don't care if it has error. */
	MPI_File_delete(filename, MPI_INFO_NULL);
	MPI_Barrier(MPI_COMM_WORLD);	/* prevent racing condition */

	mrc = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE|MPI_MODE_RDWR,
		    info, &fh);
	VRFY((mrc==MPI_SUCCESS), "MPI_FILE_OPEN");

	printf("MPIO GB file write test %s\n", filename);

	/* instead of writing every bytes of the file, we will just write
	 * some data around the 2 and 4 GB boundaries.  That should cover
	 * potential integer overflow and filesystem size limits.
	 */
	writerrs = 0;
	for (n=2; n <= 4; n+=2){
	    ntimes = GB/MB*n/mpi_size + 1;
	    for (i=ntimes-2; i <= ntimes; i++){
		mpi_off = (i*mpi_size + mpi_rank)*(MPI_Offset)MB;
		if (VERBOSE_MED)
		    HDfprintf(stdout,"proc %d: write to mpi_off=%016llx, %lld\n",
			mpi_rank, mpi_off, mpi_off);
		/* set data to some trivial pattern for easy verification */
		for (j=0; j<MB; j++)
		    *(buf+j) = i*mpi_size + mpi_rank;
		if (VERBOSE_MED)
		    HDfprintf(stdout,"proc %d: writing %d bytes at offset %lld\n",
			mpi_rank, MB, mpi_off);
		mrc = MPI_File_write_at(fh, mpi_off, buf, MB, MPI_BYTE, &mpi_stat);
		INFO((mrc==MPI_SUCCESS), "GB size file write");
		if (mrc!=MPI_SUCCESS)
		    writerrs++;
	    }
	}

	/* close file and free the communicator */
	mrc = MPI_File_close(&fh);
	VRFY((mrc==MPI_SUCCESS), "MPI_FILE_CLOSE");

	mrc = MPI_Barrier(MPI_COMM_WORLD);
	VRFY((mrc==MPI_SUCCESS), "Sync after writes");

	/*
	 * Verify if we can read the multiple GB file just created.
	 */
	/* open it again to verify the data written */
	/* but only if there was no write errors */
	printf("MPIO GB file read test %s\n", filename);
	if (errors_sum(writerrs)>0){
	    printf("proc %d: Skip read test due to previous write errors\n",
		mpi_rank);
	    goto finish;
	}
	mrc = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_RDONLY, info, &fh);
	VRFY((mrc==MPI_SUCCESS), "");

	/* Only read back parts of the file that have been written. */
	for (n=2; n <= 4; n+=2){
	    ntimes = GB/MB*n/mpi_size + 1;
	    for (i=ntimes-2; i <= ntimes; i++){
		mpi_off = (i*mpi_size + (mpi_size - mpi_rank - 1))*(MPI_Offset)MB;
		if (VERBOSE_MED)
		    HDfprintf(stdout,"proc %d: read from mpi_off=%016llx, %lld\n",
			mpi_rank, mpi_off, mpi_off);
		mrc = MPI_File_read_at(fh, mpi_off, buf, MB, MPI_BYTE, &mpi_stat);
		INFO((mrc==MPI_SUCCESS), "GB size file read");
		expected = i*mpi_size + (mpi_size - mpi_rank - 1);
		vrfyerrs=0;
		for (j=0; j<MB; j++){
		    if ((*(buf+j) != expected) &&
			(vrfyerrs++ < MAX_ERR_REPORT || VERBOSE_MED)){
			    printf("proc %d: found data error at [%ld+%d], expect %d, got %d\n",
				mpi_rank, (long)mpi_off, j, expected, *(buf+j));
		    }
		}
		if (vrfyerrs > MAX_ERR_REPORT && !VERBOSE_MED)
		    printf("proc %d: [more errors ...]\n", mpi_rank);

		nerrs += vrfyerrs;
	    }
	}

	/* close file and free the communicator */
	mrc = MPI_File_close(&fh);
	VRFY((mrc==MPI_SUCCESS), "MPI_FILE_CLOSE");

	/*
	 * one more sync to ensure all processes have done reading
	 * before ending this test.
	 */
	mrc = MPI_Barrier(MPI_COMM_WORLD);
	VRFY((mrc==MPI_SUCCESS), "Sync before leaving test");

        /*
         * Check if MPI_File_get_size works correctly.  Some systems (only SGI Altix
         * Propack 4 so far) return wrong file size.  It can be avoided by reconfiguring
         * with "--disable-mpi-size".
         */
#ifdef H5_HAVE_MPI_GET_SIZE
	printf("Test if MPI_File_get_size works correctly with %s\n", filename);

	mrc = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_RDONLY, info, &fh);
        VRFY((mrc==MPI_SUCCESS), "");

        if (MAINPROCESS){			/* only process 0 needs to check it*/
            mrc = MPI_File_get_size(fh, &size);
	    VRFY((mrc==MPI_SUCCESS), "");

            mrc=stat(filename, &stat_buf);
	    VRFY((mrc==0), "");

            /* Hopefully this casting is safe */
            if(size != (MPI_Offset)(stat_buf.st_size)) {
                printf("Warning: MPI_File_get_size doesn't return correct file size.  To avoid using it in the library, reconfigure and rebuild the library with --disable-mpi-size.\n");
            }
        }

	/* close file and free the communicator */
	mrc = MPI_File_close(&fh);
	VRFY((mrc==MPI_SUCCESS), "MPI_FILE_CLOSE");

	/*
	 * one more sync to ensure all processes have done reading
	 * before ending this test.
	 */
	mrc = MPI_Barrier(MPI_COMM_WORLD);
	VRFY((mrc==MPI_SUCCESS), "Sync before leaving test");
#else
        printf("Skipped testing MPI_File_get_size because it's disabled\n");
#endif
    }

finish:
    if (buf)
	HDfree(buf);
    return (nerrs);
}
示例#14
0
文件: t_mpi.c 项目: Len3d/appleseed
int
main(int argc, char **argv)
{
    int mpi_size, mpi_rank;				/* mpi variables */
    int ret_code;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);

    /* Attempt to turn off atexit post processing so that in case errors
     * happen during the test and the process is aborted, it will not get
     * hang in the atexit post processing in which it may try to make MPI
     * calls.  By then, MPI calls may not work.
     */
    if (H5dont_atexit() < 0){
	printf("Failed to turn off atexit processing. Continue.\n", mpi_rank);
    };
    H5open();
    if (parse_options(argc, argv) != 0){
	if (MAINPROCESS)
	    usage();
	goto finish;
    }

    if (MAINPROCESS){
	printf("===================================\n");
	printf("MPI functionality tests\n");
	printf("===================================\n");
    }

    if (VERBOSE_MED)
	h5_show_hostname();

    fapl = H5Pcreate (H5P_FILE_ACCESS);
    H5Pset_fapl_mpio(fapl, MPI_COMM_WORLD, MPI_INFO_NULL);

    /* set alarm. */
    ALARM_ON;


    /*=======================================
     * MPIO 1 write Many read test
     *=======================================*/
    MPI_BANNER("MPIO 1 write Many read test...");
    ret_code = test_mpio_1wMr(filenames[0], USENONE);
    ret_code = errors_sum(ret_code);
    if (mpi_rank==0 && ret_code > 0){
	printf("***FAILED with %d total errors\n", ret_code);
	nerrors += ret_code;
    }

    /* test atomicity and file sync in high verbose mode only         */
    /* since they often hang when broken and PHDF5 does not use them. */
    if (VERBOSE_HI){
	MPI_BANNER("MPIO 1 write Many read test with atomicity...");
	ret_code = test_mpio_1wMr(filenames[0], USEATOM);
	ret_code = errors_sum(ret_code);
	if (mpi_rank==0 && ret_code > 0){
	    printf("***FAILED with %d total errors\n", ret_code);
	    nerrors += ret_code;
	}

	MPI_BANNER("MPIO 1 write Many read test with file sync...");
	ret_code = test_mpio_1wMr(filenames[0], USEFSYNC);
	ret_code = errors_sum(ret_code);
	if (mpi_rank==0 && ret_code > 0){
	    printf("***FAILED with %d total errors\n", ret_code);
	    nerrors += ret_code;
	}
    }


    /*=======================================
     * MPIO MPIO File size range test
     *=======================================*/
    MPI_BANNER("MPIO File size range test...");
    ret_code = test_mpio_gb_file(filenames[0]);
    ret_code = errors_sum(ret_code);
    if (mpi_rank==0 && ret_code > 0){
	printf("***FAILED with %d total errors\n", ret_code);
	nerrors += ret_code;
    }


    /*=======================================
     * MPIO independent overlapping writes
     *=======================================*/
    MPI_BANNER("MPIO independent overlapping writes...");
    ret_code = test_mpio_overlap_writes(filenames[0]);
    ret_code = errors_sum(ret_code);
    if (mpi_rank==0 && ret_code > 0){
	printf("***FAILED with %d total errors\n", ret_code);
	nerrors += ret_code;
    }

    /*=======================================
     * MPIO complicated derived datatype test
     *=======================================*/
    /* test_mpio_derived_dtype often hangs when fails.
     * Do not run it if it is known NOT working unless ask to
     * run explicitly by high verbose mode.
     */
#ifdef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
    MPI_BANNER("MPIO complicated derived datatype test...");
    ret_code = test_mpio_derived_dtype(filenames[0]);
#else
    if (VERBOSE_HI){
	MPI_BANNER("MPIO complicated derived datatype test...");
	ret_code = test_mpio_derived_dtype(filenames[0]);
    }else{
	MPI_BANNER("MPIO complicated derived datatype test SKIPPED.");
	ret_code = 0;	/* fake ret_code */
    }
#endif
    ret_code = errors_sum(ret_code);
    if (mpi_rank==0 && ret_code > 0){
	printf("***FAILED with %d total errors\n", ret_code);
	nerrors += ret_code;
    }

    /*=======================================
     * MPIO special collective IO  test
     *=======================================*/
    /* test_special_collective_io  often hangs when fails.
     * Do not run it if it is known NOT working unless ask to
     * run explicitly by high verbose mode.
     */
    if(mpi_size !=4){
      MPI_BANNER("MPIO special collective io test SKIPPED.");
      if(mpi_rank == 0){
        printf("Use FOUR processes to run this test\n");
        printf("If you still see the <test SKIPPED>, use <-vh> option to verify the test\n");
  }
      ret_code = 0;
      goto sc_finish;
    }

#ifdef H5_MPI_SPECIAL_COLLECTIVE_IO_WORKS
    MPI_BANNER("MPIO special collective io test...");
    ret_code = test_mpio_special_collective(filenames[0]);

#else
    if (VERBOSE_HI){
	MPI_BANNER("MPIO special collective io test...");
	ret_code = test_mpio_special_collective(filenames[0]);
    }else{
	MPI_BANNER("MPIO special collective io test SKIPPED.");
	ret_code = 0;	/* fake ret_code */
    }
#endif

sc_finish:
    ret_code = errors_sum(ret_code);
    if (mpi_rank==0 && ret_code > 0){
	printf("***FAILED with %d total errors\n", ret_code);
	nerrors += ret_code;
    }


finish:
    /* make sure all processes are finished before final report, cleanup
     * and exit.
     */
    MPI_Barrier(MPI_COMM_WORLD);
    if (MAINPROCESS){		/* only process 0 reports */
	printf("===================================\n");
	if (nerrors){
	    printf("***MPI tests detected %d errors***\n", nerrors);
	}
	else{
	    printf("MPI tests finished with no errors\n");
	}
	printf("===================================\n");
    }

    /* turn off alarm */
    ALARM_OFF;

    h5_cleanup(FILENAME, fapl);
    H5close();

    /* MPI_Finalize must be called AFTER H5close which may use MPI calls */
    MPI_Finalize();

    /* cannot just return (nerrors) because exit code is limited to 1byte */
    return(nerrors!=0);
}
ForecastData* Init_ForecastData(char* fcst_filename,unsigned int string_size)
{
	FILE* inputfile = NULL;
	ForecastData* Forecaster;
	int errorcode,valsread;
	char end_char;
	unsigned int buff_size = string_size + 20;
	char* linebuffer = (char*) malloc(buff_size*sizeof(char));
	MPI_Barrier(MPI_COMM_WORLD);

	if(my_rank == 0)
	{
		//Open file
		inputfile = fopen(fcst_filename,"r");
		errorcode = 0;
		if(!inputfile)
		{
			printf("[%i]: Error opening file %s.\n",my_rank,fcst_filename);
			errorcode = 1;
		}
	}

	//Check if forecast file was openned
	MPI_Bcast(&errorcode,1,MPI_INT,0,MPI_COMM_WORLD);
	if(errorcode)	return NULL;

	//Reserve space
	Forecaster = (ForecastData*) malloc(sizeof(ForecastData));
	Forecaster->model_name = (char*) malloc(string_size*sizeof(char));

	//Read table name
	//if(my_rank == 0)
	{
		ReadLineFromTextFile(inputfile,linebuffer,buff_size,string_size);
		valsread = sscanf(linebuffer,"%s",Forecaster->model_name);
		if(ReadLineError(valsread,1,"forecaster model name"))	return NULL;
		//length = strlen(Forecaster->model_name);
	}
	//MPI_Bcast(&length,1,MPI_UNSIGNED,0,MPI_COMM_WORLD);
	//MPI_Bcast(Forecaster->model_name,length+1,MPI_CHAR,0,MPI_COMM_WORLD);

	//Read if data is displayed on ifis
	//if(my_rank == 0)
	{
		ReadLineFromTextFile(inputfile,linebuffer,buff_size,string_size);
		valsread = sscanf(linebuffer,"%hi",&(Forecaster->ifis_display));
		if(ReadLineError(valsread,1,"flag if displaying on ifis"))	return NULL;
	}
	//MPI_Bcast(&(Forecaster->ifis_display),1,MPI_SHORT,0,MPI_COMM_WORLD);

	//Read which forcing index is used for forecasting
	//if(my_rank == 0)
	{
		ReadLineFromTextFile(inputfile,linebuffer,buff_size,string_size);
		valsread = sscanf(linebuffer,"%u",&(Forecaster->forecasting_forcing));
		if(ReadLineError(valsread,1,"index of forecastin forcing"))	return NULL;
	}
	//MPI_Bcast(&(Forecaster->forecasting_forcing),1,MPI_UNSIGNED,0,MPI_COMM_WORLD);

	//Read number of rainfall steps to use per forecast
	//if(my_rank == 0)
	{
		ReadLineFromTextFile(inputfile,linebuffer,buff_size,string_size);
		valsread = sscanf(linebuffer,"%u",&(Forecaster->num_rainsteps));
		if(ReadLineError(valsread,1,"number of precipitation values"))	return NULL;
	}
	//MPI_Bcast(&(Forecaster->num_rainsteps),1,MPI_UNSIGNED,0,MPI_COMM_WORLD);

	//Read forecast window
	ReadLineFromTextFile(inputfile,linebuffer,buff_size,string_size);
	valsread = sscanf(linebuffer,"%lf",&(Forecaster->forecast_window));
	if(ReadLineError(valsread,1,"forecast window"))	return NULL;

	//Read and create a database connection for the rain maps
	Forecaster->rainmaps_filename = NULL;
	Forecaster->rainmaps_db = NULL;
	//if(my_rank == 0)
	{
		Forecaster->rainmaps_filename = (char*) malloc(string_size*sizeof(char));
		ReadLineFromTextFile(inputfile,linebuffer,buff_size,string_size);
		valsread = sscanf(linebuffer,"%s",Forecaster->rainmaps_filename);
		if(ReadLineError(valsread,1,"rain map filename"))	return NULL;

		Forecaster->rainmaps_db = ReadDBC(Forecaster->rainmaps_filename,string_size);
		if(!Forecaster->rainmaps_db)	return NULL;
	}

	//Read halt filename
	Forecaster->halt_filename = (char*) malloc(string_size*sizeof(char));
	//if(my_rank == 0)
	{
		ReadLineFromTextFile(inputfile,linebuffer,buff_size,string_size);
		valsread = sscanf(linebuffer,"%s",Forecaster->halt_filename);
		if(ReadLineError(valsread,1,"halt filename"))	return NULL;
		//length = strlen(Forecaster->halt_filename);
	}
	//MPI_Bcast(&length,1,MPI_UNSIGNED,0,MPI_COMM_WORLD);
	//MPI_Bcast(Forecaster->halt_filename,length+1,MPI_CHAR,0,MPI_COMM_WORLD);

	//Read ending mark
	//if(my_rank == 0)
	{
		ReadLineFromTextFile(inputfile,linebuffer,buff_size,string_size);
		valsread = sscanf(linebuffer,"%c",&end_char);
		if(ReadLineError(valsread,1,"ending mark"))	return NULL;
	}
	//MPI_Bcast(&end_char,1,MPI_CHAR,0,MPI_COMM_WORLD);

	//Clean up
	free(linebuffer);
	if(my_rank == 0)	fclose(inputfile);

	MPI_Barrier(MPI_COMM_WORLD);
	if(end_char != '#')
	{
		if(my_rank == 0)
			printf("[%i]: Error: Ending mark not seen in %s.\n",my_rank,fcst_filename);
		return NULL;
	}
	return Forecaster;
}
示例#16
0
	void system::set_geometry(const bool init) 
	{
		const double dt_max = 1.0/512;
		scheduler = Scheduler(dt_max);

		int np;
		float lx, ly, lz;
		FILE *fin = NULL;
		if (myproc == 0)
		{
			float wp;
			fin = fopen(fin_data, "r");
			int ival;
			size_t nread;

			nread = fread(&ival, sizeof(int), 1, fin);		assert(ival == 2*sizeof(int));
			nread = fread(&np, sizeof(int), 1, fin);
			nread = fread(&wp, sizeof(float), 1, fin);
			nread = fread(&ival, sizeof(int), 1, fin);		assert(ival == 2*sizeof(int));
			
			nread = fread(&ival, sizeof(int), 1, fin);		assert(ival == 3*sizeof(float));
			nread = fread(&lx, sizeof(float), 1, fin);
			nread = fread(&ly, sizeof(float), 1, fin);
			nread = fread(&lz, sizeof(float), 1, fin);
			nread = fread(&ival, sizeof(int), 1, fin);		assert(ival == 3*sizeof(float));

			fprintf(stderr, " np= %d  wp= %g \n",np, wp);
			fprintf(stderr, " lx= %g  ly= %g  lz= %g \n", lx, ly, lz);
		}

		MPI_Bcast(&lx,  1, MPI_FLOAT, 0, MPI_COMM_WORLD);
		MPI_Bcast(&ly,  1, MPI_FLOAT, 0, MPI_COMM_WORLD);
		MPI_Bcast(&lz,  1, MPI_FLOAT, 0, MPI_COMM_WORLD);

		t_end   = 0.2;

		n_restart = 2;
		dt_restart = dt_max;

		dt_dump = 0.01;

		di_log = 100;

		global_n = local_n = 0;

//		eulerian = true;

		const vec3 rmin(0.0);
		const vec3 rmax(lx, ly, lz);
		global_domain = boundary(rmin, rmax);
		global_domain_size = global_domain.hsize() * 2.0;

		const vec3 Len3 = global_domain.hsize() * 2.0;
		pfloat<0>::set_scale(Len3.x);
		pfloat<1>::set_scale(Len3.y);
		pfloat<2>::set_scale(Len3.z);

		if (myproc == 0) 
		{

			ptcl.resize(np);

			const int nx = (int)std::pow(np, 1.0/3.0);
			const dvec3 dr = dvec3(Len3.x/nx, Len3.y/nx, Len3.z/nx);
			const real rmax = dr.abs() * 1.0;

			fprintf(stderr, "dr= %g %g %g \n", dr.x, dr.y, dr.z);

			local_n  = ptcl.size();
			global_n = local_n;

			{
				std::vector<float> x(local_n), y(local_n), z(local_n);
				size_t nread;
				int ival;

				nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float));
				nread = fread(&x[0], sizeof(float), local_n, fin);
				assert((int)nread == local_n);
				nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float));
				
				nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float));
				nread = fread(&y[0], sizeof(float), local_n, fin);
				assert((int)nread == local_n);
				nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float));
				
				nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float));
				nread = fread(&z[0], sizeof(float), local_n, fin);
				assert((int)nread == local_n);
				nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float));

				for (int i = 0; i < local_n; i++)
				{
					const dvec3 vel(0.0, 0.0, 0.0);
					ptcl[i] = Particle(x[i], y[i], z[i], vel.x, vel.y, vel.z, i);
					ptcl[i].rmax = rmax;
					ptcl[i].unset_derefine();
				}
			}

			U.resize(local_n);
			const int var_list[7] = {
				Fluid::VELX,
				Fluid::VELY,
				Fluid::VELZ,
				Fluid::DENS,
				Fluid::BX,
				Fluid::BY,
				Fluid::BZ};

			std::vector<float> data(local_n);
			for (int var = 0; var < 7; var++)
			{
				fprintf(stderr, " reading vat %d out of %d \n", var+1, 7);
				int ival;
				size_t nread;
				nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float));
				nread = fread(&data[0], sizeof(float), local_n, fin);
				assert((int)nread == local_n);
				nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float));
				for (int i = 0; i < local_n; i++)
					U[i][var_list[var]] = data[i];
			}
			for (int i = 0; i < local_n; i++)
			{
				assert(U[i][Fluid::DENS] > 0.0);
				U[i][Fluid::ETHM] = cs2 * U[i][Fluid::DENS];
			}


			fclose(fin);

			fprintf(stderr, "  *** proc= %d : local_n= %d  global_n= %d \n", myproc, local_n, global_n);
		} // myproc == 0

		MPI_Bcast(&global_n,  1, MPI_INT, 0, MPI_COMM_WORLD);

		fprintf(stderr, " proc= %d  distrubite \n", myproc);
		MPI_Barrier(MPI_COMM_WORLD);

		Distribute::int3 nt(1, 1, 1);
		switch(nproc) {
			case 1: break;
			case 2: nt.x = 2; nt.y = 1; nt.z = 1; break;
			case 4: nt.x = 2; nt.y = 2; nt.z = 1; break;
			case 6: nt.x = 3; nt.y = 2; nt.z = 1; break;
			case 8: nt.x = 2; nt.y = 2; nt.z = 2; break;
			case 16: nt.x = 4; nt.y = 2; nt.z = 2; break;
			case 32: nt.x = 4; nt.y = 4; nt.z = 2; break;
			case 64: nt.x = 4; nt.y = 4; nt.z = 4; break;
			case 128: nt.x = 8; nt.y = 4; nt.z = 4; break;
			case 256: nt.x = 8; nt.y = 8; nt.z = 4; break;
			case 512: nt.x = 8; nt.y = 8; nt.z = 8; break;
			default: assert(false);
		}

		const Distribute::int3 nt_glb(nt);
		const pBoundary pglobal_domain(pfloat3(0.0), pfloat3(Len3));
		distribute_glb.set(nproc, nt, pglobal_domain);

		for (int k = 0; k < 5; k++)
			distribute_data(true, false);

		const int nloc_reserve = (int)(2.0*global_n/nproc);
		fit_reserve_vec(ptcl,      nloc_reserve);
		fit_reserve_vec(ptcl_ppos, nloc_reserve);
		fit_reserve_vec(U,         nloc_reserve);
		fit_reserve_vec(dU,        nloc_reserve);
		fit_reserve_vec(Wgrad,     nloc_reserve);
		fit_reserve_vec(gradPsi,   nloc_reserve);
		fit_reserve_vec(cells,     nloc_reserve);

		MPI_Barrier(MPI_COMM_WORLD);

		fprintf(stderr, " *** proc= %d : local_n= %d  global_n= %d \n", myproc, local_n, global_n);
		fprintf(stderr, " proc= %d  building_mesh \n", myproc);

		MPI_Barrier(MPI_COMM_WORLD);



		const double t10 = mytimer::get_wtime();
		clear_mesh();
		int nattempt = build_mesh(true);
		double dt10 = mytimer::get_wtime() - t10;

		double volume_loc = 0.0;
		{
			std::vector<TREAL> v(local_n);
			for (int i = 0; i < local_n; i++)
				v[i] = cells[i].Volume;
			std::sort(v.begin(), v.end());  // sort volumes from low to high, to avoid roundoff errors
			for (int i = 0; i < local_n; i++)
				volume_loc += v[i];
		}


		double dt10max;
		MPI_Allreduce(&dt10, &dt10max, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
		double volume_glob = 0.0;	
		int    nattempt_max, nattempt_min;
		MPI_Allreduce(&volume_loc, &volume_glob,  1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
		MPI_Allreduce(&nattempt,   &nattempt_max, 1, MPI_INT,    MPI_MAX, MPI_COMM_WORLD);
		MPI_Allreduce(&nattempt,   &nattempt_min, 1, MPI_INT,    MPI_MIN, MPI_COMM_WORLD);

		const double volume_exact = global_domain_size.x*global_domain_size.y*global_domain_size.z;
		if (myproc == 0)
		{
			fprintf(stderr, "first call build_mesh:[ %g  sec ::  %g cells/s/proc/thread ]\n",
					dt10max,
					global_n/nproc/dt10max);
			fprintf(stderr, "   computed_volume= %g  exact_volume= %g diff= %g [ %g ]  nattempt= %d %d \n",
					volume_glob, volume_exact, 
					volume_glob - volume_exact,	(volume_glob - volume_exact)/volume_exact,
					nattempt_min, nattempt_max);
		}

		exchange_ptcl();

	}
示例#17
0
/*@C
   PetscSharedWorkingDirectory - Determines if all processors in a communicator share a
         working directory or have different ones.

   Collective on MPI_Comm

   Input Parameters:
.  comm - MPI_Communicator that may share working directory

   Output Parameters:
.  shared - PETSC_TRUE or PETSC_FALSE

   Options Database Keys:
+    -shared_working_directory 
.    -not_shared_working_directory

   Environmental Variables:
+     PETSC_SHARED_WORKING_DIRECTORY
.     PETSC_NOT_SHARED_WORKING_DIRECTORY

   Level: developer

   Notes:
   Stores the status as a MPI attribute so it does not have
    to be redetermined each time.

      Assumes that all processors in a communicator either
       1) have a common working directory or
       2) each has a separate working directory
      eventually we can write a fancier one that determines which processors
      share a common working directory.

   This will be very slow on runs with a large number of processors since
   it requires O(p*p) file opens.

@*/
PetscErrorCode PETSC_DLLEXPORT PetscSharedWorkingDirectory(MPI_Comm comm,PetscTruth *shared)
{
  PetscErrorCode     ierr;
  PetscMPIInt        size,rank,*tagvalp,sum,cnt,i;
  PetscTruth         flg,iflg;
  FILE               *fd;
  static PetscMPIInt Petsc_WD_keyval = MPI_KEYVAL_INVALID;
  int                err;

  PetscFunctionBegin;
  ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
  if (size == 1) {
    *shared = PETSC_TRUE;
    PetscFunctionReturn(0);
  }

  ierr = PetscOptionsGetenv(comm,"PETSC_SHARED_WORKING_DIRECTORY",PETSC_NULL,0,&flg);CHKERRQ(ierr);
  if (flg) {
    *shared = PETSC_TRUE;
    PetscFunctionReturn(0);
  }

  ierr = PetscOptionsGetenv(comm,"PETSC_NOT_SHARED_WORKING_DIRECTORY",PETSC_NULL,0,&flg);CHKERRQ(ierr);
  if (flg) {
    *shared = PETSC_FALSE;
    PetscFunctionReturn(0);
  }

  if (Petsc_WD_keyval == MPI_KEYVAL_INVALID) {
    ierr = MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelTmpShared,&Petsc_WD_keyval,0);CHKERRQ(ierr);
  }

  ierr = MPI_Attr_get(comm,Petsc_WD_keyval,(void**)&tagvalp,(int*)&iflg);CHKERRQ(ierr);
  if (!iflg) {
    char       filename[PETSC_MAX_PATH_LEN];

    /* This communicator does not yet have a shared  attribute */
    ierr = PetscMalloc(sizeof(PetscMPIInt),&tagvalp);CHKERRQ(ierr);
    ierr = MPI_Attr_put(comm,Petsc_WD_keyval,tagvalp);CHKERRQ(ierr);

    ierr = PetscGetWorkingDirectory(filename,240);CHKERRQ(ierr);
    ierr = PetscStrcat(filename,"/petsctestshared");CHKERRQ(ierr);
    ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
    
    /* each processor creates a  file and all the later ones check */
    /* this makes sure no subset of processors is shared */
    *shared = PETSC_FALSE;
    for (i=0; i<size-1; i++) {
      if (rank == i) {
        fd = fopen(filename,"w");
        if (!fd) SETERRQ1(PETSC_ERR_FILE_OPEN,"Unable to open test file %s",filename);
        err = fclose(fd);
        if (err) SETERRQ(PETSC_ERR_SYS,"fclose() failed on file");    
      }
      ierr = MPI_Barrier(comm);CHKERRQ(ierr);
      if (rank >= i) {
        fd = fopen(filename,"r");
        if (fd) cnt = 1; else cnt = 0;
        if (fd) {
          err = fclose(fd);
          if (err) SETERRQ(PETSC_ERR_SYS,"fclose() failed on file");    
        }
      } else {
        cnt = 0;
      }
      ierr = MPI_Allreduce(&cnt,&sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
      if (rank == i) {
        unlink(filename);
      }

      if (sum == size) {
        *shared = PETSC_TRUE;
        break;
      } else if (sum != 1) {
        SETERRQ(PETSC_ERR_SUP_SYS,"Subset of processes share working directory");
      }
    }
    *tagvalp = (int)*shared;
  } else {
    *shared = (PetscTruth) *tagvalp;
  }
  ierr = PetscInfo1(0,"processors %s working directory\n",(*shared) ? "shared" : "do NOT share");CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
示例#18
0
static int reallocate_buffers(int numNewVertices, int numNewPins)
{
  int status = 0;
  ZOLTAN_ID_TYPE *idbuf=NULL;
  int *ibuf=NULL;
  float *fbuf=NULL;

  if (verbose) MPI_Barrier(MPI_COMM_WORLD);

  if (numNewVertices > numMyVertices){   /* avoid realloc bug */
    idbuf = (ZOLTAN_ID_TYPE *)malloc(sizeof(ZOLTAN_ID_TYPE) * numNewVertices);
    if (!idbuf) return 1;
    memcpy(idbuf, vtxGID, sizeof(ZOLTAN_ID_TYPE) * numMyVertices);
    free(vtxGID);
    vtxGID = idbuf;
    if (verbose){
      printf("(%d) vtxGID allocated for %d vertices\n",myRank,numNewVertices);
    }

    ibuf = (int *)malloc(sizeof(int) * (numNewVertices+1));
    if (!ibuf) return 1;
    memcpy(ibuf, nborIndex, sizeof(int) * (1 +numMyVertices));
    free(nborIndex);
    nborIndex = ibuf;
    if (verbose){
      printf("(%d) nborIndex allocated for %d indices into nbor array\n",myRank,numNewVertices+1);
    }
  }

  if (numNewPins > numMyPins){
    idbuf = (ZOLTAN_ID_TYPE *)malloc(sizeof(ZOLTAN_ID_TYPE) * numNewPins);
    if (!idbuf) return 1;
    memcpy(idbuf, nborGID, sizeof(ZOLTAN_ID_TYPE) * numMyPins);
    free(nborGID);
    nborGID = idbuf;
    if (verbose){
      printf("(%d) nborGID allocated for %d neighbor IDs\n",myRank,numNewPins);
    }

    ibuf = (int *)malloc(sizeof(int) * numNewPins);
    if (!ibuf) return 1;
    memcpy(ibuf, nborProc, sizeof(int) * numMyPins);
    free(nborProc);
    nborProc = ibuf;
    if (verbose){
      printf("(%d) nborProc allocated for %d process IDs\n",myRank,numNewPins);
    }

    fbuf = (float *)malloc(sizeof(float) * numNewPins);
    if (!fbuf) return 1;
    memcpy(fbuf, edgeWgt, sizeof(float) * numMyPins);
    free(edgeWgt);
    edgeWgt = fbuf;
    if (verbose){
      printf("(%d) edgeWgt allocated for %d edge weights\n",myRank,numNewPins);
    }
  }

  if (verbose) {
    fflush(stdout);
    MPI_Barrier(MPI_COMM_WORLD);
  }

  return status;
}
int main (int argc, char ** argv) 
{
    int         rank, size, i, j, k, token;
    MPI_Comm    comm = MPI_COMM_WORLD;
    MPI_Status  status;
    enum ADIOS_READ_METHOD method = ADIOS_READ_METHOD_BP;
    ADIOS_SELECTION * sel;
    void * data = NULL;
    uint64_t start[3], count[3], step = 0;

    MPI_Init (&argc, &argv);
    MPI_Comm_rank (comm, &rank);
    MPI_Comm_size (comm, &size);

    adios_read_init_method (method, comm, "verbose=3");
    adios_logger_open ("log_read_as_file_C", rank);

    /* adios_read_open_file() allows for seeing all timesteps in the file */
    ADIOS_FILE * f = adios_read_open_file ("global_array_time_C.bp", method, comm);
    if (f == NULL)
    {
        log_error ("%s\n", adios_errmsg());
        return -1;
    }

    ADIOS_VARINFO * v = adios_inq_var (f, "temperature");

    // read in two timesteps
    data = malloc (2 * v->dims[0] * v->dims[1] * sizeof (double));
    if (data == NULL)
    {
        log_error ("malloc failed.\n");
        return -1;
    }

    // read in timestep 'rank' (up to 12)
    step = rank % 13;

    start[0] = 0;
    count[0] = v->dims[0];

    start[1] = 0;
    count[1] = v->dims[1];

    /* Read a subset of the temperature array */
    sel = adios_selection_boundingbox (v->ndim, start, count);
    /*    2 steps from 'step' */
    adios_schedule_read (f, sel, "temperature", step, 2, data);
    adios_perform_reads (f, 1);

    if (rank == 0) 
        log_test ("Array size of temperature [0:%lld,0:%lld]\n", v->dims[0], v->dims[1]);   

    if (rank > 0) {
        MPI_Recv (&token, 1, MPI_INT, rank-1, 0, comm, &status);
    }

    log_test("------------------------------------------------\n");
    log_test("rank=%d: \n", rank);
    for (i = 0; i < 2; i++) {
        log_test ("step %lld = [\n", step+i);   
        for (j = 0; j < v->dims[0]; j++) {
            log_test (" [");
            for (k = 0; k < v->dims[1]; k++) {
                log_test ("%g ", ((double *)data) [ i * v->dims[0] * v->dims[1] + j * v->dims[1] + k]);
            }
            log_test ("]\n");
        }
        log_test ("]\n");
    }
    log_test ("\n");

    if (rank < size-1) {
        MPI_Send (&token, 1, MPI_INT, rank+1, 0, comm);
    }

    free (data);
    adios_free_varinfo (v);

    adios_read_close (f);
    MPI_Barrier (comm);
    adios_read_finalize_method (method);
    adios_logger_close();
    MPI_Finalize ();
    return 0;
}
示例#20
0
int main(int argc, char *argv[])
{
  int rc, do_hier, status;
  float ver;
  struct Zoltan_Struct *zz;
  int changes, numGidEntries, numLidEntries, numImport, numExport;
  int generate_files = 0;
  char *platform=NULL, *topology=NULL;
  char *graph_package=NULL;
  ZOLTAN_ID_PTR importGlobalGids, importLocalGids, exportGlobalGids, exportLocalGids;
  int *importProcs, *importToPart, *exportProcs, *exportToPart;
  struct option opts[10];
  double comm_time[10];
  float cut_weight[3] = {0., 0., 0.};
  long nvert=0;
  char *debug_level=NULL;

  status = 0;

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
  MPI_Comm_size(MPI_COMM_WORLD, &numProcs);

  Zoltan_Initialize(argc, argv, &ver);
  zz = Zoltan_Create(MPI_COMM_WORLD);

  /******************************************************************
  ** Check that this test makes sense.
  ******************************************************************/

  if (sizeof(long) < sizeof(ZOLTAN_ID_TYPE)){
    if (myRank == 0){
      printf("ERROR: This code assumes that a long is at least %d bytes\n",(int)sizeof(ZOLTAN_ID_TYPE));
    }
    status = 1;
  }

  check_error_status(status, "configuration error");

  /******************************************************************
  ** Initialize zoltan
  ******************************************************************/

  /* options */

  opts[0].name = "platform";
  opts[0].has_arg = 1;
  opts[0].flag = NULL;
  opts[0].val = 1;

  opts[1].name = "topology";
  opts[1].has_arg = 1;
  opts[1].flag = NULL;
  opts[1].val = 2;

  opts[2].name = "size";
  opts[2].has_arg = 1;
  opts[2].flag = NULL;
  opts[2].val = 4;

  opts[3].name = "verbose";
  opts[3].has_arg = 0;
  opts[3].flag = NULL;
  opts[3].val = 5;

  opts[4].name = "help";
  opts[4].has_arg = 0;
  opts[4].flag = NULL;
  opts[4].val = 6;

  opts[5].name = "graph_package";
  opts[5].has_arg = 1;
  opts[5].flag = NULL;
  opts[5].val = 7;

  opts[6].name = "generate_files";
  opts[6].has_arg = 0;
  opts[6].flag = NULL;
  opts[6].val = 8;

  opts[7].name = "debug_level";
  opts[7].has_arg = 1;
  opts[7].flag = NULL;
  opts[7].val = 9;

  opts[8].name = 0;
  opts[8].has_arg = 0;
  opts[8].flag = NULL;
  opts[8].val = 0;

  status = 0;

  while (1){
    rc = getopt_long_only(argc, argv, "",  opts, NULL);

    if (rc == '?'){
      MPI_Barrier(MPI_COMM_WORLD);
      if (myRank == 0) usage();
      MPI_Finalize();
      exit(0);
    }
    else if (rc == 1){
      platform = optarg;
      if (myRank == 0)
        printf( "For platform %s\n",optarg );
    }
    else if (rc == 2){
      topology = optarg;
      if (myRank == 0)
        printf( "For topology %s\n",optarg);
    }
    else if (rc == 7){
      graph_package = optarg;
      if (myRank == 0)
        printf( "Zoltan parameter GRAPH_PACKAGE = %s\n",graph_package);
    }
    else if (rc == 8){
      generate_files = 1;
      if (myRank == 0)
        printf( "Zoltan_Generate_Files will be called for each level.\n");
    }
    else if (rc == 4){
      nvert = atol(optarg);
      if (nvert < 1) status = 1;
      check_error_status(status, "--size={approximate number of vertices}");
      if (myRank == 0){
        printf( "Graph will have approximately %ld vertices.\n",nvert);
      }
    }
    else if (rc == 5){
      verbose = 1;
    }
    else if (rc == 6){
      if (myRank == 0) usage();
      MPI_Finalize();
      exit(0);
    }
    else if (rc == 9){
      debug_level = optarg;
    }
    else if (rc <= 0){
      break;
    }
  }

  if ((platform==NULL) && (topology==NULL)){
    if (myRank == 0)
      fprintf(stdout,"No platform or topology, so we'll skip hierarchical partitioning\n");
    do_hier = 0;
  }
  else if (graph_package == NULL){
    if (myRank == 0)
      fprintf(stdout,"No graph package, so we'll skip hierarchical partitioning\n");
    do_hier = 0;
  }
  else{
    do_hier = 1;
  }

  /* start */

  Zoltan_Memory_Debug(0);

  if (nvert > 0)
    numGlobalVertices = nvert;
  else
    numGlobalVertices = NUM_GLOBAL_VERTICES;

  status = create_a_graph();
  check_error_status(status, "creating the graph");

  Zoltan_Set_Param(zz, "DEBUG_LEVEL", "0");
  Zoltan_Set_Param(zz, "REMAP", "0");
  Zoltan_Set_Param(zz, "NUM_GID_ENTRIES", "1");
  Zoltan_Set_Param(zz, "NUM_LID_ENTRIES", "1");
  Zoltan_Set_Param(zz, "RETURN_LISTS", "ALL"); /* export AND import lists */
  Zoltan_Set_Param(zz, "OBJ_WEIGHT_DIM", "1"); /* number of weights per vertex */
  Zoltan_Set_Param(zz, "EDGE_WEIGHT_DIM", "1");/* number of weights per hyperedge */

  Zoltan_Set_Num_Obj_Fn(zz, get_number_of_vertices, NULL);
  Zoltan_Set_Obj_List_Fn(zz, get_vertex_list, NULL);
  Zoltan_Set_Num_Edges_Multi_Fn(zz, get_num_edges_list,  NULL);
  Zoltan_Set_Edge_List_Multi_Fn(zz, get_edge_list,  NULL);

  /* GRAPH PARTITION */

  Zoltan_Set_Param(zz, "LB_METHOD", "GRAPH");
  Zoltan_Set_Param(zz, "LB_APPROACH", "PARTITION");

  if (graph_package)
    Zoltan_Set_Param(zz, "GRAPH_PACKAGE", graph_package);

  if (verbose){
    debug(zz, "Initial graph", 0);
  }

  if (generate_files){
    rc = Zoltan_Generate_Files(zz, "flat", myRank, 0, 1, 0);
    if (rc != ZOLTAN_OK) status = 1;
    check_error_status(status, "Zoltan_Generate_Files");
  }

  /* Performance before partitioning */
  time_communication(comm_time+0);
  cut_weight[0] = get_edge_cut_weight(zz);

  if (cut_weight[0] < 0.0) status = 1;
  check_error_status(status, "First call to get_edge_cut_weight");

  rc = Zoltan_LB_Partition(zz, /* input (all remaining fields are output) */
        &changes,        /* 1 if partitioning was changed, 0 otherwise */
        &numGidEntries,  /* Number of integers used for a global ID */
        &numLidEntries,  /* Number of integers used for a local ID */
        &numImport,      /* Number of vertices to be sent to me */
        &importGlobalGids,  /* Global IDs of vertices to be sent to me */
        &importLocalGids,   /* Local IDs of vertices to be sent to me */
        &importProcs,    /* Process rank for source of each incoming vertex */
        &importToPart,   /* New partition for each incoming vertex */
        &numExport,      /* Number of vertices I must send to other processes*/
        &exportGlobalGids,  /* Global IDs of the vertices I must send */
        &exportLocalGids,   /* Local IDs of the vertices I must send */
        &exportProcs,    /* Process to which I send each of the vertices */
        &exportToPart);  /* Partition to which each vertex will belong */

  if (rc != ZOLTAN_OK) status = 1;
  check_error_status(status, "First call to LB_Partition");

  status = migrate_graph(numExport, numImport, exportLocalGids, importGlobalGids);
  check_error_status(status, "migration");

  if (verbose){
    debug(zz, "After flat partitioning and migration", 0);
  }

  time_communication(comm_time+1);      /* With graph partitioning */
  cut_weight[1] = get_edge_cut_weight(zz);

  if (cut_weight[1] < 0.0) status = 1;
  check_error_status(status, "Second call to get_edge_cut_weight");

  Zoltan_LB_Free_Part(&importGlobalGids, &importLocalGids,
                      &importProcs, &importToPart);
  Zoltan_LB_Free_Part(&exportGlobalGids, &exportLocalGids,
                      &exportProcs, &exportToPart);

  if (do_hier){

    /* HIERARCHICAL PARTITION */

    free_graph();
    status = create_a_graph();
    check_error_status(status, "create graph for hierarchical partitioning");

    Zoltan_Set_Param(zz, "LB_METHOD", "HIER");
    Zoltan_Set_Param(zz, "HIER_ASSIST", "1");
    if (generate_files){
      Zoltan_Set_Param(zz, "HIER_GENERATE_FILES", "1");
    }

    if (debug_level)   /* 1, 2 or 3 */
      Zoltan_Set_Param(zz, "HIER_DEBUG_LEVEL", debug_level);
    else
      Zoltan_Set_Param(zz, "HIER_DEBUG_LEVEL", "0");

    /* TODO: Suppose graph is not symmetric, and we request SYMMETRIZE.  Do we still get
     *  a "good" answer when each sub-graph in the hierarchy is symmetrized?
     */

    if (topology)
      Zoltan_Set_Param(zz, "TOPOLOGY", topology);
    else if (platform)
      Zoltan_Set_Param(zz, "PLATFORM", platform);

    rc = Zoltan_LB_Partition(zz, /* input (all remaining fields are output) */
          &changes,        /* 1 if partitioning was changed, 0 otherwise */
          &numGidEntries,  /* Number of integers used for a global ID */
          &numLidEntries,  /* Number of integers used for a local ID */
          &numImport,      /* Number of vertices to be sent to me */
          &importGlobalGids,  /* Global IDs of vertices to be sent to me */
          &importLocalGids,   /* Local IDs of vertices to be sent to me */
          &importProcs,    /* Process rank for source of each incoming vertex */
          &importToPart,   /* New partition for each incoming vertex */
          &numExport,      /* Number of vertices I must send to other processes*/
          &exportGlobalGids,  /* Global IDs of the vertices I must send */
          &exportLocalGids,   /* Local IDs of the vertices I must send */
          &exportProcs,    /* Process to which I send each of the vertices */
          &exportToPart);  /* Partition to which each vertex will belong */

    if (rc != ZOLTAN_OK) status = 1;
    check_error_status(status, "Second call to LB_Partition");

    status = migrate_graph(numExport, numImport, exportLocalGids, importGlobalGids);
    check_error_status(status, "second migration");

    if (verbose){
      debug(zz, "After hierarchical partitioning and migration", 0);
    }

    time_communication(comm_time+2);      /* With hierarchical graph partitioning */
    cut_weight[2] = get_edge_cut_weight(zz);

    if (cut_weight[2] < 0.0) status = 1;
    check_error_status(status, "Third call to get_edge_cut_weight");

    Zoltan_LB_Free_Part(&importGlobalGids, &importLocalGids,
                        &importProcs, &importToPart);
    Zoltan_LB_Free_Part(&exportGlobalGids, &exportLocalGids,
                        &exportProcs, &exportToPart);
  }

  Zoltan_Destroy(&zz);

  free_graph();

  if (myRank == 0){
    fprintf(stdout,"Graph cut weight before partitioning: %f\n",cut_weight[0]);
    fprintf(stdout,"             after flat partitioning: %f\n",cut_weight[1]);
    if (do_hier)
      fprintf(stdout,"     after hierarchical partitioning: %f\n",cut_weight[2]);
    fflush(stdout);
  }

  if (cut_weight[1] >= cut_weight[0]){
    status = 1;
    if (zz->Proc == 0){
      fprintf(stderr,"FAILED: No improvement shown in flat partitioning");
    }
  }

  if (do_hier && (cut_weight[2] > cut_weight[0])){
    status = 1;
    if (zz->Proc == 0){
      fprintf(stderr,"FAILED: No improvement shown in hierarchical partitioning");
    }
  }


  MPI_Finalize();

  return status;
}
示例#21
0
/// Recreate the shared nodes. An alternate incorrect version can be enabled by undefining CORRECT_COORD_COMPARISON
void ParFUM_recreateSharedNodes(int meshid, int dim, MPI_Comm newComm) {

#define CORRECT_COORD_COMPARISON
  MPI_Comm comm = newComm;
  int rank, nParts;
  int send_count=0; // sanity check
  int recv_count=0; // sanity check
  MPI_Comm_rank(comm, &rank);
  MPI_Comm_size(comm, &nParts);


#if SUPER_FAST_SPECIFIC_TORUS

#define TORUSY 15
#define TORUSZ 15

  CkPrintf("rank %d is manually configuring the IDXL lists to make the shared node generation fast\n");
  
  FEM_Mesh *mesh = (FEM_chunk::get("ParFUM_recreateSharedNodes"))->lookup(meshid,"ParFUM_recreateSharedNodes");
  IDXL_Side &shared = mesh->node.shared;
  
  int low = (rank-1+nParts) % nParts;
  int high = (rank+1) % nParts;
  
  IDXL_List &list1 = shared.addList(low);
  IDXL_List &list2 = shared.addList(high);
  
  int nodesInPlane = TORUSY * TORUSZ;
  int numNodes = FEM_Mesh_get_length(meshid,FEM_NODE);
  
  // vp - 1
  for(int j=0;j<nodesInPlane;j++){
    list1.push_back(j);
  }
  
  // vp + 1
  for(int j=0;j<nodesInPlane;j++){
    list2.push_back(numNodes - nodesInPlane +j);
  }
  
  return;
#else




  // Shared data will be temporarily stored in the following structure
  int *sharedNodeCounts; // sharedCounts[i] = number of nodes shared with rank i
  int **sharedNodeLists; // sharedNodes[i] is the list of nodes shared with rank i
  // Initialize shared data
  sharedNodeCounts = (int *)malloc(nParts*sizeof(int));
  sharedNodeLists = (int **)malloc(nParts*sizeof(int *));
  for (int i=0; i<nParts; i++) {
    sharedNodeLists[i] = NULL;
    sharedNodeCounts[i] = 0;
  }
  // Get local node count and coordinates
  int numNodes;
  int coord_msg_tag=42, sharedlist_msg_tag=43;
  double *nodeCoords;
  numNodes = FEM_Mesh_get_length(meshid,FEM_NODE);
  nodeCoords = (double *)malloc(dim*numNodes*sizeof(double));

  FEM_Mesh_become_get(meshid);

  FEM_Mesh_data(meshid,FEM_NODE,FEM_COORD, nodeCoords, 0, numNodes,FEM_DOUBLE, dim);
  
  //MPI_Barrier(MPI_COMM_WORLD);
  if (rank==0) CkPrintf("Extracted node data...\n");

  // Begin exchange of node coordinates to determine shared nodes
  // FIX ME: compute bounding box, only exchange when bounding boxes collide

  /// The highest partition # to which I send my coordinates(wraps around)
  int sendUpperBound;   if(nParts %2==0){
    sendUpperBound = rank + (nParts/2)  - (rank%2);
  } else {
    sendUpperBound = rank + (nParts/2) ;
  }

  /// The lowest partition # to which I send my coordinates(wraps around)
  int sendLowerBound;
  if(nParts %2==0){
    sendLowerBound = rank - (nParts/2) + ((rank+1)%2);
  } else {
    sendLowerBound = rank - (nParts/2);
  }
  
  // Special case optimization for when the mesh is generated in such a way that only neighboring partitions share nodes
  // look for command line argument
#ifdef SHARED_NODES_ONLY_NEIGHBOR
  //#warning "ParFUM_recreateSharedNodes only allows adjacent partitions(rank +/- 1) to have shared nodes"
  sendUpperBound = rank + 1;
  sendLowerBound = rank - 1;
#endif

  for (int i=rank+1; i<=sendUpperBound; i++) { //send nodeCoords to rank i
    MPI_Send(nodeCoords, dim*numNodes, MPI_DOUBLE, i%nParts, coord_msg_tag, comm);
    send_count ++;
    //    printf("[%d] Sending %d doubles  to rank %d \n",rank,dim*numNodes,i%nParts);
  }


  // Receive coordinates from the appropriate number of other partitions
  // These can be received in any order
  for (int i=sendLowerBound; i<rank; i++) {
    std::vector<int> remoteSharedNodes, localSharedNodes;
    double *recvNodeCoords;
    MPI_Status status;
    int source, length;
    // Probe for a coordinate message from any source; extract source and msg length
    MPI_Probe(MPI_ANY_SOURCE, coord_msg_tag, comm, &status);
    source = status.MPI_SOURCE;
    length = status.MPI_LENGTH/sizeof(double);
    // printf("[%d] Receiving %d doubles from rank %d \n",rank,length,source);
    recv_count ++;
    // Receive whatever data was available according to probe
    recvNodeCoords = (double *)malloc(length*sizeof(double));
    MPI_Recv((void*)recvNodeCoords, length, MPI_DOUBLE, source, 
	      coord_msg_tag, comm, &status);
    // Match coords between local nodes and received coords
    int recvNodeCount = length/dim;


    // PERFORM THE NODE COMPARISONS

#ifdef SHARED_NODES_ONLY_NEIGHBOR

    int borderNodes = BORDERNODES;

    //#warning "Only the first and last BORDERNODES nodes on each partition are candidates for being shared nodes"

    // indices are inclusive
    int myBottomLow = 0;
    int myBottomHigh = borderNodes;
    int myTopLow = numNodes - borderNodes;
    int myTopHigh = numNodes-1;

    int recvBottomLow = 0;
    int recvBottomHigh = borderNodes;
    int recvTopLow = recvNodeCount - borderNodes;
    int recvTopHigh = recvNodeCount-1;

    CkPrintf("[%d] rank=%d myBottomLow=%d myBottomHigh=%d myTopLow=%d myTopHigh=%d   recvBottomLow=%d recvBottomHigh=%d recvTopLow=%d recvTopHigh=%d\n", CkMyPe(), rank, myBottomLow, myBottomHigh, myTopLow, myTopHigh, recvBottomLow, recvBottomHigh, recvTopLow, recvTopHigh);    

    // make sure the top region is non-negative
    if(myTopLow < 0)
      myTopLow = 0;
      
    if(recvTopLow < 0)
      recvTopLow = 0;

    // make the two regions be non-overlapping
    if(myBottomHigh >= myTopLow)
      myTopLow = myTopLow-1;
    
    if(recvBottomHigh >= recvTopLow)
      recvTopLow = recvTopLow-1;
    
    for (int j=myBottomLow; j<=myBottomHigh; j++) {
      for (int k=recvBottomLow; k<=recvBottomHigh; k++) {
	if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) {
	  localSharedNodes.push_back(j); 
	  remoteSharedNodes.push_back(k);
	  break;
	}
      }
    }

    for (int j=myTopLow; j<=myBottomHigh; j++) {
      for (int k=recvTopLow; k<=recvTopHigh; k++) {
	if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) {
	  localSharedNodes.push_back(j); 
	  remoteSharedNodes.push_back(k);
	  break;
	}
      }
    }


    for (int j=myTopLow; j<=myTopHigh; j++) {
      for (int k=recvBottomLow; k<=recvBottomHigh; k++) {
	if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) {
	  localSharedNodes.push_back(j); 
	  remoteSharedNodes.push_back(k);
	  break;
	}
      }
    }

    for (int j=myBottomLow; j<=myTopHigh; j++) {
      for (int k=recvTopLow; k<=recvTopHigh; k++) {
	if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) {
	  localSharedNodes.push_back(j); 
	  remoteSharedNodes.push_back(k);
	  break;
	}
      }
    }

#else 


    //    CkPrintf("Comparing %d nodes with %d received nodes\n", numNodes, recvNodeCount);
    for (int j=0; j<numNodes; j++) {
      for (int k=0; k<recvNodeCount; k++) {
	if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) {
	  localSharedNodes.push_back(j); 
	  remoteSharedNodes.push_back(k);
	  //printf("[%d] found local node %d to match with remote node %d \n",rank,j,k);
	  break;
	}
      }
    }

#endif

    // Copy local nodes that were shared with source into the data structure
    int *localSharedNodeList = (int *)malloc(localSharedNodes.size()*sizeof(int));
    for (int m=0; m<localSharedNodes.size(); m++) {
      localSharedNodeList[m] = localSharedNodes[m];
    }
    sharedNodeCounts[source] = localSharedNodes.size();
    sharedNodeLists[source] = localSharedNodeList;
    // do not delete localSharedNodeList as a pointer to it is stored
    // Send remote nodes that were shared with this partition to remote partition
    MPI_Send((int *)&remoteSharedNodes[0], remoteSharedNodes.size(), MPI_INT, source, 
	     sharedlist_msg_tag, comm);
    free(recvNodeCoords);
  }


  for (int i=rank+1; i<=sendUpperBound; i++) {  // recv shared node lists (from the partitions in any order)
    int *sharedNodes;
    MPI_Status status;
    int source, length;
    // Probe for a shared node list from any source; extract source and msg length
    MPI_Probe(MPI_ANY_SOURCE, sharedlist_msg_tag, comm, &status);
    source = status.MPI_SOURCE;
    length = status.MPI_LENGTH/sizeof(int);
    // Recv the shared node list the probe revealed was available
    sharedNodes = (int *)malloc(length*sizeof(int));
    MPI_Recv((void*)sharedNodes, length, MPI_INT, source, sharedlist_msg_tag, comm, &status);
    // Store the shared node list in the data structure
    sharedNodeCounts[source] = length;
    sharedNodeLists[source] = sharedNodes;
    // don't delete sharedNodes! we kept a pointer to it!
  }

  if (rank==0) CkPrintf("Received new shared node lists...\n");

  // IMPLEMENT ME: use sharedNodeLists and sharedNodeCounts to move shared node data 
  // to IDXL
  FEM_Mesh *mesh = (FEM_chunk::get("ParFUM_recreateSharedNodes"))->lookup(meshid,"ParFUM_recreateSharedNodes");
  IDXL_Side &shared = mesh->node.shared;
  
  for(int i=0;i<nParts;i++){
    if(i == rank)
      continue;
    if(sharedNodeCounts[i] != 0){
      IDXL_List &list = shared.addList(i);
      for(int j=0;j<sharedNodeCounts[i];j++){
	list.push_back(sharedNodeLists[i][j]);
      }
    }
  }
  
  
  MPI_Barrier(MPI_COMM_WORLD);

  if (rank==0) CkPrintf("Recreation of shared nodes complete...\n");

  //printf("After recreating shared nodes %d \n",rank);
  //shared.print();
#ifdef SHARED_NODES_ONLY_NEIGHBOR
  CkAssert(send_count + recv_count == 2);
#else
  CkAssert(send_count + recv_count == nParts-1);
#endif

  // Clean up
  free(nodeCoords);
  free(sharedNodeCounts);
  for (int i=0; i<nParts; i++) {
    if (sharedNodeLists[i])
      free(sharedNodeLists[i]);
  }
  free(sharedNodeLists);

#endif // normal mode, not super fast mesh specific one
}
示例#22
0
int main(int argc, char* argv[])
{
  LIS_MATRIX		A,A0;
  LIS_VECTOR		b,x,v;
  LIS_SCALAR		ntimes,nmflops,nnrm2;
  LIS_SCALAR		*value;

  int			nprocs,my_rank;
  int			nthreads, maxthreads;
  int			gn,nnz,mode;
  int			i,j,jj,j0,j1,l,k,n,np,h,ih;
  int			m,nn,ii;
  int			block;
  int			rn,rmin,rmax,rb;
  int			is,ie,clsize,ci,*iw;
  int			err,iter,storage;
  int	       		*ptr,*index;
  double		mem,val,ra,rs,ri,ria,ca,time,time2,convtime,val2,nnzs,nnzap,nnzt;
  double		commtime,comptime,flops;
  FILE			*file;
  char path[1024];

  LIS_DEBUG_FUNC_IN;
    
  lis_initialize(&argc, &argv);

#ifdef USE_MPI
  MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
  MPI_Comm_rank(MPI_COMM_WORLD,&my_rank);
#else
  nprocs  = 1;
  my_rank = 0;
#endif

  if( argc < 4 )
    {
      if( my_rank==0 ) printf("Usage: spmvtest4 matrix_filename matrix_type iter [block] \n");
      lis_finalize();
      exit(0);
    }

  file = fopen(argv[1], "r");
  if( file==NULL ) CHKERR(1);

  storage  = atoi(argv[2]);
  iter = atoi(argv[3]);
  if (argv[4] == NULL) {
    block = 2;
  }
  else {
    block = atoi(argv[4]);
  }

  if( storage<1 || storage>11 )
    {
      if( my_rank==0 ) printf("storage=%d <1 or storage=%d >11\n",storage,storage);
      CHKERR(1);
    }
  if( iter<=0 )
    {
      if( my_rank==0 ) printf("iter=%d <= 0\n",iter);
      CHKERR(1);
    }

  if( my_rank==0 )
    {
      printf("\n");
      printf("number of processes = %d\n",nprocs);
    }

#ifdef _OPENMP
  if( my_rank==0 )
    {
      nthreads = omp_get_num_procs();
      maxthreads = omp_get_max_threads();
      printf("max number of threads = %d\n", nthreads);
      printf("number of threads = %d\n", maxthreads);
    }
#else
      nthreads = 1;
      maxthreads = 1;
#endif

  /* create matrix and vectors */
  lis_matrix_create(LIS_COMM_WORLD,&A0);
  err = lis_input(A0,NULL,NULL,argv[1]);
  CHKERR(err);

  n   = A0->n;
  gn  = A0->gn;
  nnz = A0->nnz;
  np  = A0->np-n;
#ifdef USE_MPI
  MPI_Allreduce(&nnz,&i,1,MPI_INT,MPI_SUM,A0->comm);
  nnzap = (double)i / (double)nprocs;
  nnzt  = ((double)nnz -nnzap)*((double)nnz -nnzap);
  nnz   = i;
  MPI_Allreduce(&nnzt,&nnzs,1,MPI_DOUBLE,MPI_SUM,A0->comm);
  nnzs  = (nnzs / (double)nprocs)/nnzap;
  MPI_Allreduce(&np,&i,1,MPI_INT,MPI_SUM,A0->comm);
  np = i;
#endif

  err = lis_vector_duplicate(A0,&x);
  if( err ) CHKERR(err);
  err = lis_vector_duplicate(A0,&b);
  if( err ) CHKERR(err);

  lis_matrix_get_range(A0,&is,&ie);
  for(i=0;i<n;i++)
    {
      err = lis_vector_set_value(LIS_INS_VALUE,i+is,1.0,x);
    }

  lis_matrix_duplicate(A0,&A);
  lis_matrix_set_type(A,storage);
  err = lis_matrix_convert(A0,A);
  if( err ) CHKERR(err);
		    
  comptime = 0.0;
  commtime = 0.0;

  for(i=0;i<iter;i++)
    {
#ifdef USE_MPI
      MPI_Barrier(A->comm);
      time = lis_wtime();
      lis_send_recv(A->commtable,x->value);
      commtime += lis_wtime() - time;
#endif
      time2 = lis_wtime();
      lis_matvec(A,x,b);
      comptime += lis_wtime() - time2;
    }
  lis_vector_nrm2(b,&val);

  if( my_rank==0 )
    {
      flops = 2.0*nnz*iter*1.0e-6 / comptime;
      if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_BSC )
	{
	  A->bnr = block;
	  A->bnc = block;
	  printf("format = %s(%dx%d) (%2d), iteration = %d, computation = %e sec., %8.3f MFLOPS, communication = %e sec., communication/computation = %3.3f %%, 2-norm = %e\n",lis_storagename2[storage-1],block,block,storage,iter,comptime,flops,commtime,commtime/comptime*100,val);
	}
      else
	{
	  printf("format = %s (%2d), iteration = %d, computation = %e sec., %8.3f MFLOPS, communication = %e sec., communication/computation = %3.3f %%, 2-norm = %e\n",lis_storagename2[storage-1],storage,iter,comptime,flops,commtime,commtime/comptime*100,val);
	}
      lis_matrix_destroy(A);
    }

  lis_matrix_destroy(A);
  lis_matrix_destroy(A0);
  lis_vector_destroy(b);
  lis_vector_destroy(x);

  lis_finalize();

  LIS_DEBUG_FUNC_OUT;

  return 0;
}
示例#23
0
文件: Pmodeling.c 项目: yunzhishi/src
void forward_modeling(sf_file Fdat, sf_mpi *mpipar, sf_sou soupar, sf_acqui acpar, sf_vec_s array, bool verb)
/*< acoustic forward modeling >*/
{
	int ix, iz, is, ir, it;
	int sx, rx, sz, rz, frectx, frectz;
	int nz, nx, padnz, padnx, padnzx, nt, nr, nb;

	float dx2, dz2, dt2, dt;
	float **vv, **dd;
	float **p0, **p1, **p2, **term, **tmparray, *rr;

	FILE *swap;

	MPI_Comm comm=MPI_COMM_WORLD;

	swap=fopen("temswap.bin", "wb+");

	padnz=acpar->padnz;
	padnx=acpar->padnx;
	padnzx=padnz*padnx;
	nz=acpar->nz;
	nx=acpar->nx;
	nt=acpar->nt;
	nr=acpar->nr;
	nb=acpar->nb;
	sz=acpar->sz;
	rz=acpar->rz;
	frectx=soupar->frectx;
	frectz=soupar->frectz;

	dx2=acpar->dx*acpar->dx;
	dz2=acpar->dz*acpar->dz;
	dt2=acpar->dt*acpar->dt;
	dt=acpar->dt;

	vv = sf_floatalloc2(padnz, padnx);
	dd=sf_floatalloc2(nt, nr);

	p0=sf_floatalloc2(padnz, padnx);
	p1=sf_floatalloc2(padnz, padnx);
	p2=sf_floatalloc2(padnz, padnx);
	term=sf_floatalloc2(padnz, padnx);
	rr=sf_floatalloc(padnzx);

	/* padding and convert vector to 2-d array */
	pad2d(array->vv, vv, nz, nx, nb);

	for(is=mpipar->cpuid; is<acpar->ns; is+=mpipar->numprocs){
		sf_warning("###### is=%d ######", is+1);

		memset(dd[0], 0., nr*nt*sizeof(float));
		memset(p0[0], 0., padnzx*sizeof(float));
		memset(p1[0], 0., padnzx*sizeof(float));
		memset(p2[0], 0., padnzx*sizeof(float));
		
		sx=acpar->s0_v+is*acpar->ds_v;
		source_map(sx, sz, frectx, frectz, padnx, padnz, padnzx, rr);

		for(it=0; it<nt; it++){
			if(verb) sf_warning("Modeling is=%d; it=%d;", is+1, it);

			/* output data */
			for(ir=0; ir<acpar->nr2[is]; ir++){
				rx=acpar->r0_v[is]+ir*acpar->dr_v;
				dd[acpar->r02[is]+ir][it]=p1[rx][rz];
			}

			/* laplacian operator */
			laplace(p1, term, padnx, padnz, dx2, dz2);
			
			/* load source */
			for(ix=0; ix<padnx; ix++){
				for(iz=0; iz<padnz; iz++){
					term[ix][iz] += rr[ix*padnz+iz]*array->ww[it];
				}
			}

			/* update */
			for(ix=0; ix<padnx; ix++){
				for(iz=0; iz<padnz; iz++){
					p2[ix][iz]=2*p1[ix][iz]-p0[ix][iz]+vv[ix][iz]*vv[ix][iz]*dt2*term[ix][iz];
				}
			}
			
			/* swap wavefield pointer of different time steps */
			tmparray=p0; p0=p1; p1=p2; p2=tmparray;

			/* boundary condition */
			apply_sponge(p0, acpar->bc, padnx, padnz, nb);
			apply_sponge(p1, acpar->bc, padnx, padnz, nb);
		} // end of time loop

		fseeko(swap, is*nr*nt*sizeof(float), SEEK_SET);
		fwrite(dd[0], sizeof(float), nr*nt, swap);
	}// end of shot loop
	fclose(swap);
	MPI_Barrier(comm);

	/* transfer data to Fdat */
	if(mpipar->cpuid==0){
		swap=fopen("temswap.bin", "rb");
		for(is=0; is<acpar->ns; is++){
			fseeko(swap, is*nr*nt*sizeof(float), SEEK_SET);
			fread(dd[0], sizeof(float), nr*nt, swap);
			sf_floatwrite(dd[0], nr*nt, Fdat);
		}
		fclose(swap);
		remove("temswap.bin");
	}
	MPI_Barrier(comm);
	
	/* release allocated memory */
	free(*p0); free(p0); free(*p1); free(p1);
	free(*p2); free(p2); free(*vv); free(vv);
	free(*dd); free(dd);
	free(rr); free(*term); free(term);
}
示例#24
0
int main(int argc, char** argv)
{
     
     //Number of CPUs
     int numProcs;
     //Processor ID
     int rank;
     
     //The status of our receiver
     MPI_Status status;
     
     //Init MPI, Starts the parallelization sort of. 
     MPI_Init(&argc, &argv);     
     
     //Finds out how many CPUs are in our network
     MPI_Comm_size(MPI_COMM_WORLD, &numProcs);

     //Determines the rank of a process
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     //Height and width of image will be passed in.
     int height = atoi(argv[1]);
     int width = atoi(argv[2]);
     
     Complex num;
     struct timeval start;
     double time = 0.0;
     
     //Mandelbrot Set will have lie in this plane. 
     //X range
     float realMax = 2.0;
     float realMin = -2.0;
     
     //Y range
     float imagMax = 2.0;
     float imagMin = -2.0;
     
     //Scale the image so that it can be seen at the give resolution.
     float scaleX = (realMax - realMin) / width;
     float scaleY = (imagMax - imagMin) / height;
     
     //Number of slaves
     int numGroups = numProcs - 1;
     
     //Number of remaining rows after even partitions for slave.
     int remainder = height % numGroups;
     
     //How height those partitions are.
     int grpHeight = (height - remainder) / numGroups;
     
     //The area of our partition
     int partArea = grpHeight * width;
     
     //Image array
     unsigned int* image 
          = (unsigned int *) malloc(sizeof(unsigned int) * height * width);

     unsigned int* buffer 
          = (unsigned int *) malloc(sizeof(unsigned int) * (width + 10));
     
     int DATA_TAG = 0;
     int TERMINATE = 1;
     
     MPI_Barrier(MPI_COMM_WORLD);

     if (rank == 0)
     {
          int count = 0;
          int row = 0;
                    
          //Starting the clock
          gettimeofday(&start, NULL); 
          
          for (int proc = 1; proc < numProcs; proc++)
          {
               MPI_Send(&row, 1, MPI_INT, proc, DATA_TAG, MPI_COMM_WORLD);    
               count++;
               row++;
          }     
          
          do 
          {
               MPI_Recv(buffer, width, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
               
              count--;
               
               if (row < height)
               {
                    MPI_Send(&row, 1, MPI_INT, status.MPI_SOURCE, DATA_TAG, MPI_COMM_WORLD);    
                    count++;
                    row++;
               }
               else
               {
                    MPI_Send(&row, 1, MPI_INT, status.MPI_SOURCE, TERMINATE, MPI_COMM_WORLD);
                    
               }
               
               for (int x = 0; x < width; x++)
               {
                    image[status.MPI_TAG * width + x] = buffer[x];
               }
               
                    
          } while (count > 0);
           
          //Stop the clock
          time = getElapsed(&start);

          //Output result
          printf("%d cores %dx%d: %fs\n", numProcs, height, width, time);
     
          //Calculate I/O time
          //gettimeofday(&start, NULL);
     
          //Display the set
          //writeImage("Static.ppm", image, height, width); 
     
          //Stop the clock
         // time = getElapsed(&start);
     
          //Output result
          //printf("Runtime for file I/O: %fs\n", time);
                   
     }
     else 
     {
          int row;
                
          MPI_Recv(&row, 1, MPI_INT, 0, DATA_TAG, MPI_COMM_WORLD, &status);
          //printf("Slave: %d Receive Init", rank);

          while (status.MPI_TAG != TERMINATE)
          {
              num.imag = imagMin + ((float) row * scaleY);

              for (int x = 0; x < width; x++)
              {
                    //Initialize Complex based on position.
                    num.real = realMin + ((float) x * scaleX);
                                
                    //Calculates the color of the current pixel.
                    buffer[x] = calPixel(num);
               }
               
               MPI_Send(buffer, width, MPI_UNSIGNED, 0, row, MPI_COMM_WORLD);
               //printf("Slave: %d Send row %d\n", rank, row);

               //Send only partition worked on
               MPI_Recv(&row, 1, MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
               //printf("Slave: %d Recv row %d\n", rank, row);
         }
         
     }
     free(buffer);
     free(image);
     MPI_Finalize();
     return 0;
}
示例#25
0
int main(int argc, char *argv[])
{
    int num_errors = 0, total_num_errors = 0;
    int rank, size;
    char port1[MPI_MAX_PORT_NAME];
    char port2[MPI_MAX_PORT_NAME];
    MPI_Status status;
    MPI_Comm comm1, comm2;
    int verbose = 0;
    int data = 0;

    MTEST_VG_MEM_INIT(port1, MPI_MAX_PORT_NAME * sizeof(char));
    MTEST_VG_MEM_INIT(port2, MPI_MAX_PORT_NAME * sizeof(char));

    if (getenv("MPITEST_VERBOSE")) {
        verbose = 1;
    }

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    if (size < 3) {
        printf("Three processes needed to run this test.\n");
        MPI_Finalize();
        return 0;
    }

    if (rank == 0) {
        IF_VERBOSE(("0: opening ports.\n"));
        MPI_Open_port(MPI_INFO_NULL, port1);
        MPI_Open_port(MPI_INFO_NULL, port2);

        IF_VERBOSE(("0: opened port1: <%s>\n", port1));
        IF_VERBOSE(("0: opened port2: <%s>\n", port2));
        IF_VERBOSE(("0: sending ports.\n"));
        MPI_Send(port1, MPI_MAX_PORT_NAME, MPI_CHAR, 1, 0, MPI_COMM_WORLD);
        MPI_Send(port2, MPI_MAX_PORT_NAME, MPI_CHAR, 2, 0, MPI_COMM_WORLD);

        IF_VERBOSE(("0: accepting port2.\n"));
        MPI_Comm_accept(port2, MPI_INFO_NULL, 0, MPI_COMM_SELF, &comm2);
        IF_VERBOSE(("0: accepting port1.\n"));
        MPI_Comm_accept(port1, MPI_INFO_NULL, 0, MPI_COMM_SELF, &comm1);

        IF_VERBOSE(("0: closing ports.\n"));
        MPI_Close_port(port1);
        MPI_Close_port(port2);

        IF_VERBOSE(("0: sending 1 to process 1.\n"));
        data = 1;
        MPI_Send(&data, 1, MPI_INT, 0, 0, comm1);

        IF_VERBOSE(("0: sending 2 to process 2.\n"));
        data = 2;
        MPI_Send(&data, 1, MPI_INT, 0, 0, comm2);

        IF_VERBOSE(("0: disconnecting.\n"));
        MPI_Comm_disconnect(&comm1);
        MPI_Comm_disconnect(&comm2);
    }
    else if (rank == 1) {
        IF_VERBOSE(("1: receiving port.\n"));
        MPI_Recv(port1, MPI_MAX_PORT_NAME, MPI_CHAR, 0, 0, MPI_COMM_WORLD, &status);

        IF_VERBOSE(("1: received port1: <%s>\n", port1));
        IF_VERBOSE(("1: connecting.\n"));
        MPI_Comm_connect(port1, MPI_INFO_NULL, 0, MPI_COMM_SELF, &comm1);

        MPI_Recv(&data, 1, MPI_INT, 0, 0, comm1, &status);
        if (data != 1) {
            printf("Received %d from root when expecting 1\n", data);
            fflush(stdout);
            num_errors++;
        }

        IF_VERBOSE(("1: disconnecting.\n"));
        MPI_Comm_disconnect(&comm1);
    }
    else if (rank == 2) {
        IF_VERBOSE(("2: receiving port.\n"));
        MPI_Recv(port2, MPI_MAX_PORT_NAME, MPI_CHAR, 0, 0, MPI_COMM_WORLD, &status);

        IF_VERBOSE(("2: received port2: <%s>\n", port2));
        /* make sure process 1 has time to do the connect before this process
         * attempts to connect */
        MTestSleep(3);
        IF_VERBOSE(("2: connecting.\n"));
        MPI_Comm_connect(port2, MPI_INFO_NULL, 0, MPI_COMM_SELF, &comm2);

        MPI_Recv(&data, 1, MPI_INT, 0, 0, comm2, &status);
        if (data != 2) {
            printf("Received %d from root when expecting 2\n", data);
            fflush(stdout);
            num_errors++;
        }

        IF_VERBOSE(("2: disconnecting.\n"));
        MPI_Comm_disconnect(&comm2);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    MPI_Reduce(&num_errors, &total_num_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
    if (rank == 0) {
        if (total_num_errors) {
            printf(" Found %d errors\n", total_num_errors);
        }
        else {
            printf(" No Errors\n");
        }
        fflush(stdout);
    }
    MPI_Finalize();
    return total_num_errors;
}
示例#26
0
文件: main.c 项目: RWTH-OS/Hydro
int
main(int argc, char **argv) {
  char myhost[256];
  real_t dt = 0;
  int nvtk = 0;
  char outnum[80];
  int time_output = 0;
  long flops = 0;

  // real_t output_time = 0.0;
  real_t next_output_time = 0;
  double start_time = 0, end_time = 0;
  double start_iter = 0, end_iter = 0;
  double elaps = 0;
  struct timespec start, end;
  double cellPerCycle = 0;
  double avgCellPerCycle = 0;
  long nbCycle = 0;

  // array of timers to profile the code
  memset(functim, 0, TIM_END * sizeof(functim[0]));

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  process_args(argc, argv, &H);
  hydro_init(&H, &Hv);

  if (H.mype == 0)
    fprintf(stdout, "Hydro starts in %s precision.\n", ((sizeof(real_t) == sizeof(double))? "double": "single"));
  gethostname(myhost, 255);
  if (H.mype == 0) {
    fprintf(stdout, "Hydro: Main process running on %s\n", myhost);
  }

#ifdef _OPENMP
  if (H.mype == 0) {
    fprintf(stdout, "Hydro:    OpenMP mode ON\n");
    fprintf(stdout, "Hydro: OpenMP %d max threads\n", omp_get_max_threads());
    fprintf(stdout, "Hydro: OpenMP %d num threads\n", omp_get_num_threads());
    fprintf(stdout, "Hydro: OpenMP %d num procs\n", omp_get_num_procs());
  }
#endif
#ifdef MPI
  if (H.mype == 0) {
    fprintf(stdout, "Hydro: MPI run with %d procs\n", H.nproc);
  }
#else
  fprintf(stdout, "Hydro: standard build\n");
#endif


  // PRINTUOLD(H, &Hv);
#ifdef MPI
  if (H.nproc > 1)
#if FTI>0
    MPI_Barrier(FTI_COMM_WORLD);
#endif
#if FTI==0
    MPI_Barrier(MPI_COMM_WORLD);
#endif
#endif

  if (H.dtoutput > 0) {
    // outputs are in physical time not in time steps
    time_output = 1;
    next_output_time = next_output_time + H.dtoutput;
  }

  if (H.dtoutput > 0 || H.noutput > 0)
    vtkfile(++nvtk, H, &Hv);

  if (H.mype == 0)
    fprintf(stdout, "Hydro starts main loop.\n");

  //pre-allocate memory before entering in loop
  //For godunov scheme
  start = cclock();
  start = cclock();
  allocate_work_space(H.nxyt, H, &Hw_godunov, &Hvw_godunov);
  compute_deltat_init_mem(H, &Hw_deltat, &Hvw_deltat);
  end = cclock();
#ifdef MPI
#if FTI==1
  FTI_Protect(0,functim, TIM_END,FTI_DBLE);
  FTI_Protect(1,&nvtk,1,FTI_INTG);
  FTI_Protect(2,&next_output_time,1,FTI_DBLE);
  FTI_Protect(3,&dt,1,FTI_DBLE);
  FTI_Protect(4,&MflopsSUM,1,FTI_DBLE);
  FTI_Protect(5,&nbFLOPS,1,FTI_LONG);
  FTI_Protect(6,&(H.nstep),1,FTI_INTG);
  FTI_Protect(7,&(H.t),1,FTI_DBLE);
  FTI_Protect(8,Hv.uold,H.nvar * H.nxt * H.nyt,FTI_DBLE);
#endif
#endif
  if (H.mype == 0) fprintf(stdout, "Hydro: init mem %lfs\n", ccelaps(start, end));
  // we start timings here to avoid the cost of initial memory allocation
  start_time = dcclock();

  while ((H.t < H.tend) && (H.nstep < H.nstepmax)) {
    //system("top -b -n1");
    // reset perf counter for this iteration
    flopsAri = flopsSqr = flopsMin = flopsTra = 0;
    start_iter = dcclock();
    outnum[0] = 0;
    if ((H.nstep % 2) == 0) {
      dt = 0;
      // if (H.mype == 0) fprintf(stdout, "Hydro computes deltat.\n");
      start = cclock();
      compute_deltat(&dt, H, &Hw_deltat, &Hv, &Hvw_deltat);
      end = cclock();
      functim[TIM_COMPDT] += ccelaps(start, end);
      if (H.nstep == 0) {
        dt = dt / 2.0;
	if (H.mype == 0) fprintf(stdout, "Hydro computes initial deltat: %le\n", dt);
      }
#ifdef MPI
      if (H.nproc > 1) {
        real_t dtmin;
        // printf("pe=%4d\tdt=%lg\n",H.mype, dt);
#if FTI==0
	if (sizeof(real_t) == sizeof(double)) {
	    MPI_Allreduce(&dt, &dtmin, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
	  } else {
	    MPI_Allreduce(&dt, &dtmin, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD);
	  }
#endif
#if FTI>0
	if (sizeof(real_t) == sizeof(double)) {
	  MPI_Allreduce(&dt, &dtmin, 1, MPI_DOUBLE, MPI_MIN, FTI_COMM_WORLD);
	} else {
	  MPI_Allreduce(&dt, &dtmin, 1, MPI_FLOAT, MPI_MIN, FTI_COMM_WORLD);
	}
#endif
        dt = dtmin;
      }
#endif
    }
    // dt = 1.e-3;
    // if (H.mype == 1) fprintf(stdout, "Hydro starts godunov.\n");
    if ((H.nstep % 2) == 0) {
      hydro_godunov(1, dt, H, &Hv, &Hw_godunov, &Hvw_godunov);
      //            hydro_godunov(2, dt, H, &Hv, &Hw, &Hvw);
    } else {
      hydro_godunov(2, dt, H, &Hv, &Hw_godunov, &Hvw_godunov);
      //            hydro_godunov(1, dt, H, &Hv, &Hw, &Hvw);
    }
    end_iter = dcclock();
    cellPerCycle = (double) (H.globnx * H.globny) / (end_iter - start_iter) / 1000000.0L;
    avgCellPerCycle += cellPerCycle;
    nbCycle++;

    H.nstep++;
    H.t += dt;
    {
      real_t iter_time = (real_t) (end_iter - start_iter);
#ifdef MPI
      long flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t;
      start = cclock();
#if FTI==0
      MPI_Allreduce(&flopsAri, &flopsAri_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(&flopsSqr, &flopsSqr_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(&flopsMin, &flopsMin_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(&flopsTra, &flopsTra_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
#endif
#if FTI>0
      MPI_Allreduce(&flopsAri, &flopsAri_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD);
      MPI_Allreduce(&flopsSqr, &flopsSqr_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD);
      MPI_Allreduce(&flopsMin, &flopsMin_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD);
      MPI_Allreduce(&flopsTra, &flopsTra_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD);
#endif
      //       if (H.mype == 1)
      //        printf("%ld %ld %ld %ld %ld %ld %ld %ld \n", flopsAri, flopsSqr, flopsMin, flopsTra, flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t);
      flops = flopsAri_t * FLOPSARI + flopsSqr_t * FLOPSSQR + flopsMin_t * FLOPSMIN + flopsTra_t * FLOPSTRA;
      end = cclock();
      functim[TIM_ALLRED] += ccelaps(start, end);
#else
      flops = flopsAri * FLOPSARI + flopsSqr * FLOPSSQR + flopsMin * FLOPSMIN + flopsTra * FLOPSTRA;
#endif
      nbFLOPS++;

      if (flops > 0) {
        if (iter_time > 1.e-9) {
          double mflops = (double) flops / (double) 1.e+6 / iter_time;
          MflopsSUM += mflops;
          sprintf(outnum, "%s {%.2f Mflops %ld Ops} (%.3fs)", outnum, mflops, flops, iter_time);
        }
      } else {
        sprintf(outnum, "%s (%.3fs)", outnum, iter_time);
      }
    }
    if (time_output == 0 && H.noutput > 0) {
      if ((H.nstep % H.noutput) == 0) {
        vtkfile(++nvtk, H, &Hv);
        sprintf(outnum, "%s [%04d]", outnum, nvtk);
      }
    } else {
      if (time_output == 1 && H.t >= next_output_time) {
        vtkfile(++nvtk, H, &Hv);
        next_output_time = next_output_time + H.dtoutput;
        sprintf(outnum, "%s [%04d]", outnum, nvtk);
      }
    }
    if (H.mype == 0) {
	    fprintf(stdout, "--> step=%4d, %12.5e, %10.5e %.3lf MC/s%s\n", H.nstep, H.t, dt, cellPerCycle, outnum);
      fflush(stdout);
    }
#ifdef MPI
#if FTI==1
    FTI_Snapshot();     
#endif
#endif
  } // while
  end_time = dcclock();

  // Deallocate work spaces
  deallocate_work_space(H.nxyt, H, &Hw_godunov, &Hvw_godunov);
  compute_deltat_clean_mem(H, &Hw_deltat, &Hvw_deltat);

  hydro_finish(H, &Hv);
  elaps = (double) (end_time - start_time);
  timeToString(outnum, elaps);
  if (H.mype == 0) {
    fprintf(stdout, "Hydro ends in %ss (%.3lf) <%.2lf MFlops>.\n", outnum, elaps, (float) (MflopsSUM / nbFLOPS));
    fprintf(stdout, "       ");
  }
  if (H.nproc == 1) {
    int sizeFmt = sizeLabel(functim, TIM_END);
    printTimingsLabel(TIM_END, sizeFmt);
    fprintf(stdout, "\n");
    if (sizeof(real_t) == sizeof(double)) {
      fprintf(stdout, "PE0_DP ");
    } else {
      fprintf(stdout, "PE0_SP ");
    }
    printTimings(functim, TIM_END, sizeFmt);
    fprintf(stdout, "\n");
    fprintf(stdout, "%%      ");
    percentTimings(functim, TIM_END);
    printTimings(functim, TIM_END, sizeFmt);
    fprintf(stdout, "\n");
  }
#ifdef MPI
  if (H.nproc > 1) {
    double timMAX[TIM_END];
    double timMIN[TIM_END];
    double timSUM[TIM_END];
#if FTI==0
    MPI_Allreduce(functim, timMAX, TIM_END, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
    MPI_Allreduce(functim, timMIN, TIM_END, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
    MPI_Allreduce(functim, timSUM, TIM_END, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#endif
#if FTI>0
    MPI_Allreduce(functim, timMAX, TIM_END, MPI_DOUBLE, MPI_MAX, FTI_COMM_WORLD);
    MPI_Allreduce(functim, timMIN, TIM_END, MPI_DOUBLE, MPI_MIN, FTI_COMM_WORLD);
    MPI_Allreduce(functim, timSUM, TIM_END, MPI_DOUBLE, MPI_SUM, FTI_COMM_WORLD);
#endif
    if (H.mype == 0) {
      int sizeFmt = sizeLabel(timMAX, TIM_END);
      printTimingsLabel(TIM_END, sizeFmt);
      fprintf(stdout, "\n");
      fprintf(stdout, "MIN ");
      printTimings(timMIN, TIM_END, sizeFmt);
      fprintf(stdout, "\n");
      fprintf(stdout, "MAX ");
      printTimings(timMAX, TIM_END, sizeFmt);
      fprintf(stdout, "\n");
      fprintf(stdout, "AVG ");
      avgTimings(timSUM, TIM_END, H.nproc);
      printTimings(timSUM, TIM_END, sizeFmt);
      fprintf(stdout, "\n");
    }
  }
#endif
  if (H.mype == 0) {
	  fprintf(stdout, "Average MC/s: %.3lf\n", (double)(avgCellPerCycle / nbCycle));
  }

#ifdef MPI
#if FTI>0
  FTI_Finalize();
#endif
  MPI_Finalize();
#endif
  return 0;
}
示例#27
0
int main (int argc, char** argv) 
{
  fastbit_init(0);
  fastbit_set_verbose_level(0);

  ADIOS_FILE * f;
  //MPI_Comm    comm_dummy = 0;  // MPI_Comm is defined through adios_read.h 
  MPI_Comm comm_dummy = MPI_COMM_WORLD;

  int         rank, size;
  MPI_Init (&argc, &argv);			   
  MPI_Comm_rank (comm_dummy, &rank);
  MPI_Comm_size (comm_dummy, &size);

  adios_init_noxml (comm_dummy);
  
  if (argc < 2) {
    printf("Usage: index_fastbit fileName (attrName)");
    return 0;
  }

  f = adios_read_open_file (argv[1], ADIOS_READ_METHOD_BP, comm_dummy);
  if (f == NULL) {
    printf ("::%s\n", adios_errmsg());
    return -1;
  }
  
  /*
  adios_allocate_buffer (ADIOS_BUFFER_ALLOC_NOW, (f->file_size)*2/1048576 + 5); // +5MB for extra room in buffer
  adios_declare_group (&gAdios_group, gGroupNameFastbitIdx, "", adios_flag_yes);
  adios_select_method (gAdios_group, "MPI", "", "");
  */
  gIdxFileName = fastbit_adios_util_getFastbitIndexFileName(argv[1]);
  unlink(gIdxFileName);

      adios_allocate_buffer (ADIOS_BUFFER_ALLOC_NOW, 500); // +5MB for extra room in buffer
      adios_declare_group (&gAdios_group, gGroupNameFastbitIdx, "", adios_flag_yes);
      adios_select_method (gAdios_group, "MPI", "", "");

      adios_open (&gAdios_write_file, gGroupNameFastbitIdx, gIdxFileName, "w", MPI_COMM_WORLD);

#ifdef MULTI_BLOCK
      int testid = adios_define_var (gAdios_group, "pack", "", adios_integer , 0, 0, 0);
#endif
#ifdef BOX
      int testid = adios_define_var (gAdios_group, "elements", "", adios_integer , 0, 0, 0);
#endif
      //uint64_t estimatedbytes = (nb+nk+no)*adios_type_size(adios_double, NULL);
      int jobCounter = getJobCounter(f);
      uint64_t estimatedbytes =  getByteEstimationOnFile(f, rank);
      if (size > 1) {
	int maxJobsPP = jobCounter/size + 1;
        estimatedbytes = estimatedbytes * maxJobsPP /jobCounter +1048576;
      }

      estimatedbytes += 1048576;

      uint64_t adios_totalsize;      // adios_group_size needs to be call before any write_byid, Otherwise write_byid does nothing 
      adios_group_size (gAdios_write_file, estimatedbytes , &adios_totalsize);     

      printf("=> .. adios open output file: %s, rank %d allocated %" PRIu64 " bytes... \n", gIdxFileName, rank, adios_totalsize);
      // IMPORTANT: 
      // can only call open/close once in a process
      // otherwise data is tangled or only the data in the last open/close call is recorded

#ifdef MULTI_BLOCK
      adios_write_byid(gAdios_write_file, testid, &pack);
#endif
#ifdef BOX
      adios_write_byid(gAdios_write_file, testid, &recommended_index_ele);
#endif


  sumLogTime(-1);
  sumLogTimeMillis(-1);

  
  if (argc >= 3) {
     int i=2;
     while (i<argc) {
        const char* varName = argv[i];
	if(strstr(varName, "<binning prec") != NULL) {
	  if (gBinningOption == NULL) {
	    gBinningOption = argv[i];
	  }
	  if (argc == 3) {
	    buildIndexOnAllVar(f, rank, size);
	    break;
	  }
	  i++;
	  continue;
	} else {
	  ADIOS_VARINFO * v = adios_inq_var(f, varName);
	  if (v == NULL) {
	     printf("No such variable: %s\n", varName);
	     return 0;
	   }	
	  printf("building fastbit index on  variable: %s\n", varName);
	  buildIndex_mpi(f, v, rank, size);
	  adios_free_varinfo(v);
	  i++;
	}
     }
  } else {
    buildIndexOnAllVar(f, rank, size);
  }


  sumLogTime(0);
  sumLogTimeMillis(0);

  adios_close(gAdios_write_file);
  adios_read_close(f);

  //
  // writing file clean up
  //


  // read back:
  f = adios_read_open_file (gIdxFileName, ADIOS_READ_METHOD_BP, comm_dummy);
  if (f == NULL) {
    printf("No such file: %s \n", gIdxFileName);
    return 0;
  }

  int numVars = f->nvars;
  
  int i=0;
  int k=0;
  int j=0;
  for (i=0; i<numVars; i++) {
      char* varName = f->var_namelist[i];
      ADIOS_VARINFO* v = adios_inq_var(f, varName);

       adios_inq_var_blockinfo(f,v);      
      int timestep = 0;
      for (k=0; k<v->sum_nblocks; k++) {
	  verifyData(f, v, k, timestep);
      }

      adios_free_varinfo(v);
  }

  adios_read_close(f);

  if (rank == 0) {
    printf(" ==>  index file is at: %s\n", gIdxFileName);
  }

  // clean up
  MPI_Barrier (comm_dummy);
  adios_finalize (rank);
  MPI_Finalize ();
  free (gIdxFileName);

  fastbit_cleanup();
  return 0;
}
示例#28
0
文件: main.c 项目: spabreu/Adaptive
void
PrintAllCompilationOptions()
{ 
#if defined MPI
  PRINT0("Perform communications every %d iterations (default %d)\n",
	 count_to_communication, CBLOCK_DEFAULT ) ;
#if (defined COMM_COST)||(defined ITER_COST)||(defined COMM_CONFIG)
  PRINT0("Prob communication = %d\n", proba_communication) ;
#endif
#endif /* MPI */

  PRINT0("Compilation options:\n") ;
  PRINT0("- Backtrack when reset: ") ;
#if defined BACKTRACK
  PRINT0("ON\n") ;
#else
  PRINT0("OFF\n") ;
#endif

#if defined MPI
  PRINT0("- MPI (So forced count to 1 (-b 1)!\n") ;
#if defined DEBUG_MPI_ENDING
  PRINT0("- DEBUG_MPI_ENDING\n") ;
#endif
#if defined LOG_FILE
  PRINT0("- LOG_FILE\n") ;
#endif
#if defined NO_SCREEN_OUTPUT
  PRINT0("- NO_SCREEN_OUTPUT\n") ;
#endif
#if defined DISPLAY_0
  PRINT0("- DISPLAY_0\n") ;
#endif
#if defined DISPLAY_ALL
  PRINT0("- DISPLAY_ALL\n") ;
#endif
#if defined DEBUG
  PRINT0("- DEBUG\n") ;
#endif
#if defined DEBUG_QUEUE
  PRINT0("- DEBUG_QUEUE\n") ;
#endif
#if defined DEBUG_PRINT_QUEUE
  PRINT0("- DEBUG_PRINT_QUEUE\n") ;
#endif
#if defined MPI_ABORT
  PRINT0("- MPI_ABORT\n") ;
#endif
#if defined MPI_BEGIN_BARRIER
  PRINT0("- MPI_BEGIN_BARRIER\n") ;
#endif
  /* Heuristic for communications */
#if defined COMM_COST
  PRINT0("- With COMM_COST\n") ;
#elif defined ITER_COST
  PRINT0("- With ITER_COST\n") ;
#elif defined COMM_CONFIG
  PRINT0("- With COMM_CONFIG\n") ;
#else
  PRINT0("- Without comm exept for terminaison\n") ;
#endif
#endif /* MPI */

#if defined MPI_BEGIN_BARRIER
  PRINT0("===========================================\n\n") ;
  PRINT0("MPI Barrier called to synchronize processus before solve()\n");
  MPI_Barrier(MPI_COMM_WORLD);
#endif /* MPI_BEGIN_BARRIER */

}
示例#29
0
int main(int argc, char* argv[])
{
    MPI_Init(&argc, &argv);

    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    if (size!=4) {
        if (rank==0) printf("Use 4 processes\n");
        MPI_Finalize();
        return size;
    }

    {
        if (rank==0) printf("MPI_Reduce_scatter(sendbuf, recvbuf...\n");
        fflush(stdout);
        MPI_Barrier(MPI_COMM_WORLD);

        int junk = rank+1;
        int sendbuf[4] = {junk, junk*2, junk*3, junk*4};
        int recvbuf[1] = {0};
        int recvcounts[4] = {1,1,1,1};
        MPI_Reduce_scatter(sendbuf, recvbuf, recvcounts, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
        printf("%d: sendbuf = {%d,%d,%d,%d}, recvbuf = {%d} \n",
                rank, sendbuf[0], sendbuf[1], sendbuf[2], sendbuf[3], recvbuf[0]);
    }

    fflush(stdout);
    usleep(1000);
    MPI_Barrier(MPI_COMM_WORLD);
    if (rank==0) printf("===================\n");

    {
        if (rank==0) printf("MPI_Reduce_scatter(MPI_IN_PLACE, recvbuf...\n");
        fflush(stdout);
        MPI_Barrier(MPI_COMM_WORLD);

        int junk = rank+1;
        int recvbuf[4] = {junk, junk*2, junk*3, junk*4};
        int recvcounts[4] = {1,1,1,1};
        MPI_Reduce_scatter(MPI_IN_PLACE, recvbuf, recvcounts, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
        printf("%d: recvbuf = {%d,%d,%d,%d} \n",
                rank, recvbuf[0], recvbuf[1], recvbuf[2], recvbuf[3]);
    }

    fflush(stdout);
    usleep(1000);
    MPI_Barrier(MPI_COMM_WORLD);
    if (rank==0) printf("===================\n");

    {
        if (rank==0) printf("MPI_Reduce_scatter_block(sendbuf, recvbuf...\n");
        fflush(stdout);
        MPI_Barrier(MPI_COMM_WORLD);

        int junk = rank+1;
        int sendbuf[4] = {junk, junk*2, junk*3, junk*4};
        int recvbuf[1] = {0};
        int recvcount = 1;
        MPI_Reduce_scatter_block(sendbuf, recvbuf, recvcount, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
        printf("%d: sendbuf = {%d,%d,%d,%d}, recvbuf = {%d} \n",
                rank, sendbuf[0], sendbuf[1], sendbuf[2], sendbuf[3], recvbuf[0]);
    }

    fflush(stdout);
    usleep(1000);
    MPI_Barrier(MPI_COMM_WORLD);
    if (rank==0) printf("===================\n");

    {
        if (rank==0) printf("MPI_Reduce_scatter_block(MPI_IN_PLACE, recvbuf...\n");
        fflush(stdout);
        MPI_Barrier(MPI_COMM_WORLD);

        int junk = rank+1;
        int recvbuf[4] = {junk, junk*2, junk*3, junk*4};
        int recvcount = 1;
        MPI_Reduce_scatter_block(MPI_IN_PLACE, recvbuf, recvcount, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
        printf("%d: recvbuf = {%d,%d,%d,%d} \n",
                rank, recvbuf[0], recvbuf[1], recvbuf[2], recvbuf[3]);
    }

    fflush(stdout);
    usleep(1000);
    MPI_Barrier(MPI_COMM_WORLD);
    if (rank==0) printf("===================\n");

    {
        if (rank==0) printf("MPI_Reduce(sendbuf, tempbuf... + MPI_Scatter(tempbuf, recvcount...\n");
        fflush(stdout);
        MPI_Barrier(MPI_COMM_WORLD);

        int junk = rank+1;
        int sendbuf[4] = {junk, junk*2, junk*3, junk*4};
        int tempbuf[4] = {0,0,0,0};
        int recvbuf[1] = {0};
        int recvcount = 1;
        MPI_Reduce(sendbuf, tempbuf, 4*recvcount, MPI_INT, MPI_SUM, 0 /* root */, MPI_COMM_WORLD);
        MPI_Scatter(tempbuf, recvcount, MPI_INT, recvbuf, recvcount, MPI_INT, 0 /* root */, MPI_COMM_WORLD);
        printf("%d: sendbuf = {%d,%d,%d,%d}, recvbuf = {%d} \n",
                rank, sendbuf[0], sendbuf[1], sendbuf[2], sendbuf[3], recvbuf[0]);
    }

    fflush(stdout);
    usleep(1000);
    MPI_Barrier(MPI_COMM_WORLD);
    if (rank==0) printf("===================\n");

    {
        if (rank==0) printf("MPI_Reduce(MPI_IN_PLACE, recvbuf... + MPI_Scatter(MPI_IN_PLACE, recvcount...\n");
        fflush(stdout);
        MPI_Barrier(MPI_COMM_WORLD);

        int junk = rank+1;
        int recvbuf[4] = {junk, junk*2, junk*3, junk*4};
        int recvcount = 1;
        MPI_Reduce(rank==0 ? MPI_IN_PLACE : recvbuf, rank==0 ? recvbuf : NULL,
                   4*recvcount, MPI_INT, MPI_SUM, 0 /* root */, MPI_COMM_WORLD);
        MPI_Scatter(recvbuf, recvcount, MPI_INT, rank==0 ? MPI_IN_PLACE : recvbuf, recvcount, MPI_INT, 0 /* root */, MPI_COMM_WORLD);
        printf("%d: recvbuf = {%d,%d,%d,%d} \n",
                rank, recvbuf[0], recvbuf[1], recvbuf[2], recvbuf[3]);
    }

    MPI_Finalize();

    return 0;
}
示例#30
0
int main(int argc, char * argv[])
{
    /* Constant Declarations */
    //long const 	SET_SIZE = 7920;

    /* Variable Declarations */
    int		count = 0;				// local count
    double 	elapsed_time = 0.00;			// time elapsed
    int		first;					// index of first multiple
    int 	global_count = 1;			// global count
    int 	high_value;				// highest value on processor
    char 	hostname[MPI_MAX_PROCESSOR_NAME];	// host process is running on
    int	 	i;					// counter variable
    int 	id;					// process id number
    int		index;
    int 	init_status;			// initialization error status flag
    bool  	initialized = false;		// mpi initialized flag
    int 	len;				// hostname length
    int 	low_value;			// lowest value on the processor
    char*	marked;				// portion of 2 to n that is marked
    int		n;			// number of elements to sieve
    int		n_sqrt;			// square root of n
    int 	p;			// number of processes
    int		prime;
    int		proc0_size;		// size of process 0's subarray
    int		size;			// elements in marked
    int*	sqrt_primes;		// primes up to the square root
    int		sqrt_primes_index;	// index in the square root primes array
    char*	sqrt_primes_marked;	// numbers up to sqrt marked prime or not
    int		sqrt_primes_size;	// size of square root primes array

    /* Function Declarations */
    //int is_prime( int );

    /* Initialization */
    MPI_Initialized( &initialized );                     // set initialized flag
    if( !initialized )                                  // if MPI is not initialized
        init_status = MPI_Init( &argc, &argv );        // Initialize MPI
    else
        init_status = MPI_SUCCESS;   	               // otherwise set init_status to success
    if( init_status != MPI_SUCCESS ) {     	       // if not successfully initialized
        printf ("Error starting MPI program. Terminating.\n");      // print error message
        fflush(stdout);
        MPI_Abort(MPI_COMM_WORLD, init_status);                     // abort
    }
    MPI_Get_processor_name( hostname, &len );                       // set hostname

    MPI_Comm_rank( MPI_COMM_WORLD, &id );                           // set process rank
    MPI_Comm_size( MPI_COMM_WORLD, &p );                            // set size of comm group
    //printf("Process rank %d started on %s.\n", id, hostname);     // print start message
    //fflush(stdout);
    //MPI_Barrier(MPI_COMM_WORLD );

    /* Start Timer */
    MPI_Barrier( MPI_COMM_WORLD );                                  // synchronize
    elapsed_time = - MPI_Wtime();                                   // start time

    /* Check that a set size was passed into the program */
    if(argc != 2) {
        if(id==0) {
            printf("Command line: %s <m>\n", argv[0]);
            fflush(stdout);
	}
        MPI_Finalize();
        exit(1);
    }

    n = atoi(argv[1]);
    n_sqrt = ceil(sqrt((double)n));
    //if(id==0)
    //	printf("square root: %i\n", n_sqrt);
    // debug
    //if(id==0) {
	//printf("n sqrt: %i\n", n_sqrt);
   	//fflush(stdout);
    //}

    sqrt_primes_marked = (char *) malloc(n_sqrt + 1);
    sqrt_primes_marked[0] = 1;
    sqrt_primes_marked[1] =1;

    for(i = 2; i <= n_sqrt; ++i) {
	sqrt_primes_marked[i] = 0;
    }

    prime = 2;
    sqrt_primes_size = n_sqrt;
    //printf("sqrt primes size: %i\n", sqrt_primes_size);

    do {
	for(i = prime * prime; i < n_sqrt; i+=prime) {
	     sqrt_primes_marked[i] = 1;
	     //sqrt_primes_size--;
	}
	while(sqrt_primes_marked[++prime]);    
    } while (prime * prime <= n_sqrt);
    //printf("sqrt primes size: %i\n", sqrt_primes_size);
    sqrt_primes = (int *) malloc(sqrt_primes_size);
    sqrt_primes_index = 0;

    //sqrt_primes_size = 0;

    for(i = 3; i <= n_sqrt; ++i) {
	if(!sqrt_primes_marked[i]) {
	    
	    sqrt_primes[sqrt_primes_index] = i;
	   // printf("%i, ", sqrt_primes[sqrt_primes_index]);
	    sqrt_primes_index++;
                
        }
    }

    sqrt_primes_size = sqrt_primes_index;

    //printf("sqrt primes size: %i\n", sqrt_primes_size);
    //fflush(stdout);

    /* Set process's array share and first and last elements */
    low_value = 2 + BLOCK_LOW(id,p,n-1);
    high_value = 2 + BLOCK_HIGH(id,p,n-1);
    size = BLOCK_SIZE(id,p,n-1);

    //printf("Process %i block low: %i\n", id, low_value);
    //fflush(stdout);
    //printf("Process %i block high: %i\n", id, high_value);
    //fflush(stdout);
    //printf("Block size: %i\n", size);
    //fflush(stdout);

    if(low_value % 2 == 0) {
	if(high_value % 2 == 0) {
	     size = (int)floor((double)size / 2.0);
	     high_value--;
	}
	else {
	    size = size / 2;
	}
	low_value++;
    }
    else {
	if(high_value % 2 == 0) {
	     size = size / 2;
	     high_value--;
	}
	else {
	     size = (int)ceil((double)size / 2.0);
	}
    }

    //printf("Process %i block low: %i\n", id, low_value);
    //fflush(stdout);
    //printf("Process %i block high: %i\n", id, high_value);
    //fflush(stdout);
    //printf("Block size: %i\n", size);
    //fflush(stdout);

    //proc0_size = (n-1)/p;

    /* if process 0 doesn't have all the primes for sieving, then bail*/
    /*if((2+proc0_size) < (int)sqrt((double)n)) {
        if(id==0) {
            printf("Too many processes\n");
            fflush(stdout);
        }
        MPI_Finalize();
        exit(1);
    }
    */

    /* Allocate share of array */
    marked = (char *) malloc(size);

    if(marked == NULL) {
        printf("Cannot allocate enough memory\n");
        fflush(stdout);
	MPI_Finalize();
        exit(1);
    }

    /* Run Sieve */

    //printf("made it to sieve\n");
    //fflush(stdout);

    for(i = 0; i < size; i++)
	marked[i] = 0;

    if(id==0)
	first = 0;
    
    sqrt_primes_index = 0;
    prime = sqrt_primes[sqrt_primes_index];

    //printf("first prime: %i\n", prime);
    //fflush(stdout);

    //for(i = 0; i < sqrt_primes_size; i++) {

      //              printf("%i,", sqrt_primes[i]);
        //            fflush(stdout);

        //}
     

    do {
	if(prime >= low_value)
	    first = ((prime - low_value) / 2) + prime;
	else if(prime * prime > low_value) {
		first = (prime * prime - low_value) / 2;
	}
	else {
	    if(low_value % prime == 0)
		first = 0;
	    else {
		first = 1;
		while ((low_value + (2 * first)) % prime != 0)
			++first;
	    }
	}

	//printf("first: %i\n", first);
	//fflush(stdout);

	for(i = first; i < size; i += (prime))
		marked[i] = 1;

	//printf("made it to prime assignment\n");
	prime = sqrt_primes[++sqrt_primes_index];

	//printf("prime: %i\n", prime);
	//fflush(stdout);

    } while(prime * prime <= n && sqrt_primes_index < sqrt_primes_size);

    count = 0;

    for(i = 0; i < size; i++) {
	if(!marked[i])
	    count++;
    }

    //printf("size: %i\ncount: %i\n", size, count);

//    for( i=id; i<SET_SIZE; i+=p )                                                       // interleaved allocation
//        count += is_prime( i );                                                             // check if prime w/ sieve of eratosthenes

    /* Reduce Sum */
    MPI_Reduce( &count, &global_count, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD );        // reduce the primes count, root: proces 0

    /* Stop Timer */
    elapsed_time += MPI_Wtime();                                                        // end time

    //printf("Process %i found %i primes.\n", id, count);
    //fflush(stdout);

    //printf("Process %d is done in %d, running on %s.\n", id, elapsed_time, hostname);   // print process done message
    if( id == 0 ) {                                                                     // rank 0 prints global count
        printf("There are %d primes in the first %i integers.\nExecution took %10.6f.\n",
               global_count, n, elapsed_time);
	fflush(stdout);
	
//	printf("Debug:\n");
//	fflush(stdout);
//	printf("sqrt primes size: %i\n", sqrt_primes_size);
//        fflush(stdout);
	for(i = 0; i < sqrt_primes_size; i++) {
		if(!sqrt_primes[i]){
		    printf("%i,", sqrt_primes[i]);
		    fflush(stdout);
		}
	}
    }

    MPI_Barrier(MPI_COMM_WORLD);

  //  printf("rank: %i\nlow value: %i\nhigh value: %i\ncount: %i\n", id, low_value, high_value, count);

    //fflush(stdout);
    MPI_Finalize();                                                                     // finalize
    return 0;
}