Ejemplo n.º 1
0
int main(int argc, char *argv[])
{
    int errs = 0;
    int wrank, wsize;
    int periods[1] = { 0 };
    MPI_Comm cart, dgraph, graph;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
    MPI_Comm_size(MPI_COMM_WORLD, &wsize);

#if defined(TEST_NEIGHB_COLL)
    /* a basic test for the 10 (5 patterns x {blocking,nonblocking}) MPI-3
     * neighborhood collective routines */

    /* (wrap)--> 0 <--> 1 <--> ... <--> p-1 <--(wrap) */
    MPI_Cart_create(MPI_COMM_WORLD, 1, &wsize, periods, /*reorder=*/0, &cart);

    /* allgather */
    {
        int sendbuf[1] = { wrank };
        int recvbuf[2] = { 0xdeadbeef, 0xdeadbeef };

        /* should see one send to each neighbor (rank-1 and rank+1) and one receive
         * each from same */
        MPI_Neighbor_allgather(sendbuf, 1, MPI_INT, recvbuf, 1, MPI_INT, cart);

        if (wrank == 0)
            check(recvbuf[0] == 0xdeadbeef);
        else
            check(recvbuf[0] == wrank - 1);

        if (wrank == wsize - 1)
            check(recvbuf[1] == 0xdeadbeef);
        else
            check(recvbuf[1] == wrank + 1);
    }

    /* allgatherv */
    {
        int sendbuf[1]    = { wrank };
        int recvbuf[2]    = { 0xdeadbeef, 0xdeadbeef };
        int recvcounts[2] = { 1, 1 };
        int displs[2]     = { 1, 0};

        /* should see one send to each neighbor (rank-1 and rank+1) and one receive
         * each from same, but put them in opposite slots in the buffer */
        MPI_Neighbor_allgatherv(sendbuf, 1, MPI_INT, recvbuf, recvcounts, displs, MPI_INT, cart);

        if (wrank == 0)
            check(recvbuf[1] == 0xdeadbeef);
        else
            check(recvbuf[1] == wrank - 1);

        if (wrank == wsize - 1)
            check(recvbuf[0] == 0xdeadbeef);
        else
            check(recvbuf[0] == wrank + 1);
    }

    /* alltoall */
    {
        int sendbuf[2]    = { -(wrank+1), wrank+1 };
        int recvbuf[2]    = { 0xdeadbeef, 0xdeadbeef };

        /* should see one send to each neighbor (rank-1 and rank+1) and one
         * receive each from same */
        MPI_Neighbor_alltoall(sendbuf, 1, MPI_INT, recvbuf, 1, MPI_INT, cart);

        if (wrank == 0)
            check(recvbuf[0] == 0xdeadbeef);
        else
            check(recvbuf[0] == wrank);

        if (wrank == wsize - 1)
            check(recvbuf[1] == 0xdeadbeef);
        else
            check(recvbuf[1] == -(wrank + 2));
    }

    /* alltoallv */
    {
        int sendbuf[2]    = { -(wrank+1), wrank+1 };
        int recvbuf[2]    = { 0xdeadbeef, 0xdeadbeef };
        int sendcounts[2] = { 1, 1 };
        int recvcounts[2] = { 1, 1 };
        int sdispls[2]    = { 0, 1 };
        int rdispls[2]    = { 1, 0 };

        /* should see one send to each neighbor (rank-1 and rank+1) and one receive
         * each from same, but put them in opposite slots in the buffer */
        MPI_Neighbor_alltoallv(sendbuf, sendcounts, sdispls, MPI_INT,
                               recvbuf, recvcounts, rdispls, MPI_INT,
                               cart);

        if (wrank == 0)
            check(recvbuf[1] == 0xdeadbeef);
        else
            check(recvbuf[1] == wrank);

        if (wrank == wsize - 1)
            check(recvbuf[0] == 0xdeadbeef);
        else
            check(recvbuf[0] == -(wrank + 2));
    }

    /* alltoallw */
    {
        int sendbuf[2]            = { -(wrank+1), wrank+1 };
        int recvbuf[2]            = { 0xdeadbeef, 0xdeadbeef };
        int sendcounts[2]         = { 1, 1 };
        int recvcounts[2]         = { 1, 1 };
        MPI_Aint sdispls[2]       = { 0, sizeof(int) };
        MPI_Aint rdispls[2]       = { sizeof(int), 0 };
        MPI_Datatype sendtypes[2] = { MPI_INT, MPI_INT };
        MPI_Datatype recvtypes[2] = { MPI_INT, MPI_INT };

        /* should see one send to each neighbor (rank-1 and rank+1) and one receive
         * each from same, but put them in opposite slots in the buffer */
        MPI_Neighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes,
                               recvbuf, recvcounts, rdispls, recvtypes,
                               cart);

        if (wrank == 0)
            check(recvbuf[1] == 0xdeadbeef);
        else
            check(recvbuf[1] == wrank);

        if (wrank == wsize - 1)
            check(recvbuf[0] == 0xdeadbeef);
        else
            check(recvbuf[0] == -(wrank + 2));
    }


    MPI_Comm_free(&cart);
#endif /* defined(TEST_NEIGHB_COLL) */

    MPI_Reduce((wrank == 0 ? MPI_IN_PLACE : &errs), &errs, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
    if (wrank == 0) {
        if (errs) {
            printf("found %d errors\n", errs);
        }
        else {
            printf(" No errors\n");
        }
    }
    MPI_Finalize();

    return 0;
}
Ejemplo n.º 2
0
int main(int argc, char **argv)
{
    int np[2];
    ptrdiff_t n[3];
    ptrdiff_t alloc_local;
    ptrdiff_t local_ni[3], local_i_start[3];
    ptrdiff_t local_no[3], local_o_start[3];
    long double err;
    pfftl_complex *in, *out;
    pfftl_plan plan_forw=NULL, plan_back=NULL;
    MPI_Comm comm_cart_2d;

    /* Set size of FFT and process mesh */
    n[0] = 29;
    n[1] = 27;
    n[2] = 31;
    np[0] = 2;
    np[1] = 2;

    /* Initialize MPI and PFFT */
    MPI_Init(&argc, &argv);
    pfftl_init();

    /* Create two-dimensional process grid of size np[0] x np[1], if possible */
    if( pfftl_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ) {
        pfftl_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]);
        MPI_Finalize();
        return 1;
    }

    /* Get parameters of data distribution */
    alloc_local = pfftl_local_size_dft_3d(n, comm_cart_2d, PFFT_TRANSPOSED_NONE,
                                          local_ni, local_i_start, local_no, local_o_start);

    /* Allocate memory */
    in  = pfftl_alloc_complex(alloc_local);
    out = pfftl_alloc_complex(alloc_local);

    /* Plan parallel forward FFT */
    plan_forw = pfftl_plan_dft_3d(
                    n, in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT);

    /* Plan parallel backward FFT */
    plan_back = pfftl_plan_dft_3d(
                    n, out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT);

    /* Initialize input with random numbers */
    pfftl_init_input_complex_3d(n, local_ni, local_i_start,
                                in);

    /* execute parallel forward FFT */
    pfftl_execute(plan_forw);

    /* execute parallel backward FFT */
    pfftl_execute(plan_back);

    /* Scale data */
    ptrdiff_t l;
    for(l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++)
        in[l] /= (n[0]*n[1]*n[2]);

    /* Print error of back transformed data */
    err = pfftl_check_output_complex_3d(n, local_ni, local_i_start, in, comm_cart_2d);
    pfftl_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]);
    pfftl_printf(comm_cart_2d, "maxerror = %6.2Le;\n", err);

    /* free mem and finalize */
    pfftl_destroy_plan(plan_forw);
    pfftl_destroy_plan(plan_back);
    MPI_Comm_free(&comm_cart_2d);
    pfftl_free(in);
    pfftl_free(out);
    MPI_Finalize();
    return 0;
}
Ejemplo n.º 3
0
int main(int argc, char* argv[])
{
  int i, j, loop, num_alive, maxloop;
  int ldboard, ldnbngb, ldlboard;
  double t1, t2;
  double temps;
 
  int *board;
  int *nbngb;

  int local_alive;
  int *global_board;

  struct grid grid;
  MPI_Comm comm;
  int nb_proc_row;
  int nb_proc_tot;
  int rank;
  int nb_in_block;

  MPI_Init(&argc,&argv);
  MPI_Comm_size(MPI_COMM_WORLD, &nb_proc_tot);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  // initialization of the grid communicator
  if (EXIT_FAILURE == compute_communicator(nb_proc_tot,&nb_proc_row,&comm,&rank)){
    MPI_Finalize();
    return EXIT_SUCCESS;
  }


  if (argc < 2) {
    maxloop = 10;
  } else if (argc > 2){
    maxloop = atoi(argv[1]);
    BS = atoi(argv[2]);
  } else
    maxloop = atoi(argv[1]);
  num_alive = 0;
  local_alive = 0;

  /* Leading dimension of the board array */
  ldboard = BS;
  if (ldboard % nb_proc_row != 0){
    if (rank == 0)
      printf("Wrong BS (or wrong number of procs) ... exiting now.\n");
    MPI_Finalize();
    return EXIT_FAILURE;
  }

  /* Leading dimension of the neigbour counters array */
  nb_in_block = ldboard / nb_proc_row;
  ldnbngb = nb_in_block;
  ldlboard = nb_in_block + 2;

  board = malloc( ldlboard * ldlboard * sizeof(int) );
  nbngb = malloc( ldnbngb * ldnbngb * sizeof(int) );

  if (rank == 0){
    global_board = malloc( ldboard * ldboard * sizeof(int) );
    num_alive = generate_initial_board( &global_cell( 1, 1), ldboard );
    printf("Starting number of living cells = %d\n", num_alive);
    t1 = mytimer();
  }

  matrix_placement_proc(nb_proc_row, nb_in_block, &comm, &(global_cell( 1, 1)), &(cell( 1, 1)), SCATTER, ldlboard);

  mpi_grid_init(&comm, &grid, rank);
  //printf("rank #%d: %d %d\n", rank, grid.rank_I, grid.rank_J);


  //output_lboard( nb_in_block, board, ldlboard, 0, rank );

  for (loop = 1; loop <= maxloop; loop++) {

    MPI_Datatype blocktype; // we need a specific type for row exchange
    MPI_Type_vector(nb_in_block, 1, ldlboard, MPI_INT, &blocktype);
    MPI_Type_commit(&blocktype);
    // for upper/lower ghost row
    MPI_Sendrecv(&(cell( 1, 1)), 1, blocktype, grid.proc_above, 99, 
		 &(cell( nb_in_block+1, 1)), 1, blocktype, grid.proc_under, 99,
		 comm, MPI_STATUS_IGNORE);
    MPI_Sendrecv(&(cell( nb_in_block, 1)), 1, blocktype, grid.proc_under, 99,
		 &(cell( 0, 1)), 1, blocktype, grid.proc_above, 99, 
		 comm, MPI_STATUS_IGNORE);

    // for left/right ghost col
    MPI_Sendrecv(&(cell( 0, 1)), ldlboard, MPI_INT, grid.proc_left, 98, 
		 &(cell( 0, nb_in_block+1)), ldlboard, MPI_INT, grid.proc_right, 98,
		 comm, MPI_STATUS_IGNORE);
    MPI_Sendrecv(&(cell( 0, nb_in_block)), ldlboard, MPI_INT, grid.proc_right, 98,
		 &(cell( 0, 0)), ldlboard, MPI_INT, grid.proc_left, 98, 
		 comm, MPI_STATUS_IGNORE);

    //debug
    /* if (loop == 1) */
    /*   output_lboard( nb_in_block, board, ldlboard, 0, rank ); */

    //calcul du nombre de voisins
    for (j = 1; j <= nb_in_block; j++) {
      for (i = 1; i <= nb_in_block; i++) {
  	ngb( i, j ) =
  	  cell( i-1, j-1 ) + cell( i, j-1 ) + cell( i+1, j-1 ) +
  	  cell( i-1, j   ) +                  cell( i+1, j   ) +
  	  cell( i-1, j+1 ) + cell( i, j+1 ) + cell( i+1, j+1 );
      }
    }

    //mise à jour de la matrice
    local_alive = 0;
    for (j = 1; j <= nb_in_block; j++) {
      for (i = 1; i <= nb_in_block; i++) {
  	if ( (ngb( i, j ) < 2) ||
  	     (ngb( i, j ) > 3) ) {
  	  cell(i, j) = 0;
  	}
  	else {
  	  if ((ngb( i, j )) == 3)
  	    cell(i, j) = 1;
  	}
  	if (cell(i, j) == 1) {
  	  local_alive ++;
  	}
      }
    }

    //output_lboard( nb_in_block, board, ldlboard, loop, rank );
#ifdef PRINT_ALIVE
    MPI_Reduce(&local_alive, &num_alive, 1, MPI_INT, MPI_SUM, 0, comm);
    if (rank == 0)
      printf("%d \n", num_alive);
#endif
  }

  matrix_placement_proc(nb_proc_row, nb_in_block, &comm, &(cell( 1, 1)), &(global_cell( 1, 1)), GATHER, ldlboard);
  MPI_Reduce(&local_alive, &num_alive, 1, MPI_INT, MPI_SUM, 0, comm);

  if (rank == 0){
    t2 = mytimer();
    temps = t2 - t1;
    printf("Final number of living cells = %d\n", num_alive);
    printf("time=%.2lf ms\n",(double)temps * 1.e3);
    
    //output_board( BS, &(global_cell(1, 1)), ldboard, maxloop);
    free(global_board);
  }
  free(board);
  free(nbngb);

  MPI_Comm_free(&comm);
  MPI_Finalize();

  return EXIT_SUCCESS;
}
Ejemplo n.º 4
0
/*
    Check that the MPI implementation properly handles zero-dimensional
    Cartesian communicators - the original standard implies that these
    should be consistent with higher dimensional topologies and thus
    these should work with any MPI implementation.  MPI 2.1 made this
    requirement explicit.
*/
int main(int argc, char *argv[])
{
    int errs = 0;
    int size, rank, ndims;
    MPI_Comm comm, newcomm;

    MTest_Init(&argc, &argv);

    /* Create a new cartesian communicator in a subset of the processes */
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    if (size < 2) {
        fprintf(stderr, "This test needs at least 2 processes\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    MPI_Cart_create(MPI_COMM_WORLD, 0, NULL, NULL, 0, &comm);

    if (comm != MPI_COMM_NULL) {
        int csize;
        MPI_Comm_size(comm, &csize);
        if (csize != 1) {
            errs++;
            fprintf(stderr, "Sizes is wrong in cart communicator.  Is %d, should be 1\n", csize);
        }

        /* This function is not meaningful, but should not fail */
        MPI_Dims_create(1, 0, NULL);

        ndims = -1;
        MPI_Cartdim_get(comm, &ndims);
        if (ndims != 0) {
            errs++;
            fprintf(stderr, "MPI_Cartdim_get: ndims is %d, should be 0\n", ndims);
        }

        /* this function should not fail */
        MPI_Cart_get(comm, 0, NULL, NULL, NULL);

        MPI_Cart_rank(comm, NULL, &rank);
        if (rank != 0) {
            errs++;
            fprintf(stderr, "MPI_Cart_rank: rank is %d, should be 0\n", rank);
        }

        /* this function should not fail */
        MPI_Cart_coords(comm, 0, 0, NULL);

        MPI_Cart_sub(comm, NULL, &newcomm);
        ndims = -1;
        MPI_Cartdim_get(newcomm, &ndims);
        if (ndims != 0) {
            errs++;
            fprintf(stderr, "MPI_Cart_sub did not return zero-dimensional communicator\n");
        }

        MPI_Barrier(comm);

        MPI_Comm_free(&comm);
        MPI_Comm_free(&newcomm);
    }
    else if (rank == 0) {
        errs++;
        fprintf(stderr, "Communicator returned is null!");
    }

    MTest_Finalize(errs);

    MPI_Finalize();

    return 0;
}
Ejemplo n.º 5
0
int main(int argc, char *argv[])
{
    int provided, wrank, wsize, nmsg, i, tag;
    int *(buf[MAX_TARGETS]), bufsize[MAX_TARGETS];
    MPI_Request r[MAX_TARGETS];
    MPI_Comm commDup, commEven;

    MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided);
    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
    MPI_Comm_size(MPI_COMM_WORLD, &wsize);

    if (wsize < 4) {
        fprintf(stderr, "This test requires at least 4 processes\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /* Create several communicators */
    MPI_Comm_dup(MPI_COMM_WORLD, &commDup);
    MPI_Comm_set_name(commDup, "User dup of comm world");

    MPI_Comm_split(MPI_COMM_WORLD, wrank & 0x1, wrank, &commEven);
    if (wrank & 0x1)
        MPI_Comm_free(&commEven);
    else
        MPI_Comm_set_name(commEven, "User split to even ranks");

    /* Create a collection of pending sends and receives
     * We use tags on the sends and receives (when ANY_TAG isn't used)
     * to provide an easy way to check that the proper requests are present.
     * TAG values use fields, in decimal (for easy reading):
     * 0-99: send/recv type:
     * 0 - other
     * 1 - irecv
     * 2 - isend
     * 3 - issend
     * 4 - ibsend
     * 5 - irsend
     * 6 - persistent recv
     * 7 - persistent send
     * 8 - persistent ssend
     * 9 - persistent rsend
     * 10 - persistent bsend
     * 100-999: destination (for send) or source, if receive.  999 = any-source
     * (rank is value/100)
     * 1000-2G: other values
     */
    /* Create the send/receive buffers */
    nmsg = 10;
    for (i = 0; i < nmsg; i++) {
        bufsize[i] = i;
        if (i) {
            buf[i] = (int *) calloc(bufsize[i], sizeof(int));
            if (!buf[i]) {
                fprintf(stderr, "Unable to allocate %d words\n", bufsize[i]);
                MPI_Abort(MPI_COMM_WORLD, 2);
            }
        } else
            buf[i] = 0;
    }

    /* Partial implementation */
    if (wrank == 0) {
        nmsg = 0;
        tag = 2 + 1 * 100;
        MPI_Isend(buf[0], bufsize[0], MPI_INT, 1, tag, MPI_COMM_WORLD, &r[nmsg++]);
        tag = 3 + 2 * 100;
        MPI_Issend(buf[1], bufsize[1], MPI_INT, 2, tag, MPI_COMM_WORLD, &r[nmsg++]);
        tag = 1 + 3 * 100;
        MPI_Irecv(buf[2], bufsize[2], MPI_INT, 3, tag, MPI_COMM_WORLD, &r[nmsg++]);
    } else if (wrank == 1) {
    } else if (wrank == 2) {
    } else if (wrank == 3) {
    }

    /* provide a convenient place to wait */
    MPI_Barrier(MPI_COMM_WORLD);
    printf("Barrier 1 finished\n");

    /* Match up (or cancel) the requests */
    if (wrank == 0) {
        MPI_Waitall(nmsg, r, MPI_STATUSES_IGNORE);
    } else if (wrank == 1) {
        tag = 2 + 1 * 100;
        MPI_Recv(buf[0], bufsize[0], MPI_INT, 0, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    } else if (wrank == 2) {
        tag = 3 + 2 * 100;
        MPI_Recv(buf[1], bufsize[1], MPI_INT, 0, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    } else if (wrank == 3) {
        tag = 1 + 3 * 100;
        MPI_Send(buf[2], bufsize[2], MPI_INT, 0, tag, MPI_COMM_WORLD);
    }

    MPI_Barrier(MPI_COMM_WORLD);
    printf("Barrier 2 finished\n");

    MPI_Comm_free(&commDup);
    if (commEven != MPI_COMM_NULL)
        MPI_Comm_free(&commEven);

    MPI_Finalize();
    return 0;
}
int main(int argc, char **argv)
{
    int i, rank, nproc;
    int shm_rank, shm_nproc;
    MPI_Aint size;
    int errors = 0, all_errors = 0;
    int **bases = NULL, *my_base = NULL;
    int disp_unit;
    MPI_Win shm_win = MPI_WIN_NULL, win = MPI_WIN_NULL;
    MPI_Comm shm_comm = MPI_COMM_NULL;
    MPI_Group shm_group = MPI_GROUP_NULL, world_group = MPI_GROUP_NULL;
    int dst_shm_rank, dst_world_rank;
    MPI_Info create_info = MPI_INFO_NULL;

    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);

    MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &shm_comm);

    MPI_Comm_rank(shm_comm, &shm_rank);
    MPI_Comm_size(shm_comm, &shm_nproc);

    /* Platform does not support shared memory, just return. */
    if (shm_nproc < 2) {
        goto exit;
    }

    /* Specify the last process in the node as the target process */
    dst_shm_rank = shm_nproc - 1;
    MPI_Comm_group(shm_comm, &shm_group);
    MPI_Comm_group(MPI_COMM_WORLD, &world_group);
    MPI_Group_translate_ranks(shm_group, 1, &dst_shm_rank, world_group, &dst_world_rank);

    bases = calloc(shm_nproc, sizeof(int *));

    /* Allocate shm window among local processes, then create a global window with
     * those shm window buffers */
    MPI_Win_allocate_shared(sizeof(int) * ELEM_PER_PROC, sizeof(int), MPI_INFO_NULL,
                            shm_comm, &my_base, &shm_win);
    if (verbose)
        printf("%d -- allocate shared: my_base = %p, absolute base\n", shm_rank, my_base);

    for (i = 0; i < shm_nproc; i++) {
        MPI_Win_shared_query(shm_win, i, &size, &disp_unit, &bases[i]);
        if (verbose)
            printf("%d --    shared query: base[%d]=%p, size %ld, unit %d\n",
                   shm_rank, i, bases[i], size, disp_unit);
    }

#ifdef USE_INFO_ALLOC_SHM
    MPI_Info_create(&create_info);
    MPI_Info_set(create_info, "alloc_shm", "true");
#else
    create_info = MPI_INFO_NULL;
#endif

    MPI_Win_create(my_base, sizeof(int) * ELEM_PER_PROC, sizeof(int), create_info, MPI_COMM_WORLD,
                   &win);

    /* Reset data */
    for (i = 0; i < ELEM_PER_PROC; i++) {
        my_base[i] = 0;
        local_buf[i] = i + 1;
    }

    /* Do RMA through global window, then check value through shared window */
    MPI_Win_lock_all(0, win);
    MPI_Win_lock_all(0, shm_win);

    if (shm_rank == 0) {
        MPI_Put(&local_buf[0], 1, MPI_INT, dst_world_rank, 0, 1, MPI_INT, win);
        MPI_Put(&local_buf[ELEM_PER_PROC - 1], 1, MPI_INT, dst_world_rank, ELEM_PER_PROC - 1, 1,
                MPI_INT, win);
        MPI_Win_flush(dst_world_rank, win);
    }

    MPI_Win_sync(shm_win);
    MPI_Barrier(shm_comm);
    MPI_Win_sync(shm_win);

    if (bases[dst_shm_rank][0] != local_buf[0]) {
        errors++;
        printf("%d -- Got %d at rank %d index %d, expected %d\n", rank,
               bases[dst_shm_rank][0], dst_shm_rank, 0, local_buf[0]);
    }
    if (bases[dst_shm_rank][ELEM_PER_PROC - 1] != local_buf[ELEM_PER_PROC - 1]) {
        errors++;
        printf("%d -- Got %d at rank %d index %d, expected %d\n", rank,
               bases[dst_shm_rank][ELEM_PER_PROC - 1], dst_shm_rank,
               ELEM_PER_PROC - 1, local_buf[ELEM_PER_PROC - 1]);
    }

    MPI_Win_unlock_all(shm_win);
    MPI_Win_unlock_all(win);

    MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);

    MPI_Win_free(&win);
    MPI_Win_free(&shm_win);

  exit:
    if (rank == 0 && all_errors == 0)
        printf(" No Errors\n");

    if (create_info != MPI_INFO_NULL)
        MPI_Info_free(&create_info);
    if (shm_comm != MPI_COMM_NULL)
        MPI_Comm_free(&shm_comm);
    if (shm_group != MPI_GROUP_NULL)
        MPI_Group_free(&shm_group);
    if (world_group != MPI_GROUP_NULL)
        MPI_Group_free(&world_group);

    MPI_Finalize();

    if (bases)
        free(bases);

    return 0;
}
int main( int argc, char **argv ) {
    int rank, size, i,j, r;
    //int table[MAXN][MAXN];
    int newtable[MAXN * MAXN];
    int row[MAXN];
    int ranks[MAXN];
    int last;
    int v;
    int N = 5;

    MPI_Datatype subarray;

    int array_size[] ={N};
    int array_subsize[] = {N};
    int array_start[] = {0};

    MPI_Comm  COMM_LAST;
    MPI_Group group_world, group_last;

    MPI_Init( &argc, &argv );
    MPI_Comm_rank( MPI_COMM_WORLD, &rank );
    MPI_Comm_size( MPI_COMM_WORLD, &size );

    MPI_Type_create_subarray(1, array_size, array_subsize, array_start, MPI_ORDER_C, MPI_INT, &subarray);
    MPI_Type_commit(&subarray);

    last = N % size;

    /* If I'm the root (process 0), then fill out the big table */
    if (rank == 0) {
        int k = 0;
        for ( i=0; i<N; i++) 
            for ( j=0; j<N; j++ ) 
                newtable[i*N + j] = ++k;
    } else {
        for ( i=0; i<N; i++)  {
            for ( j=0; j<N; j++ ) 
                newtable[i * N + j] = 0;
            row[i] = 0;
        }
    }
    printf("@[%d] Initialize..\n", rank);
    MPI_Barrier(MPI_COMM_WORLD);
 
    for(r = 0; r < size; r++) {
        MPI_Barrier(MPI_COMM_WORLD);
        if(rank == r) {
            printf("@[%d] X-> \n", rank);
            for(i=0; i<N; i++)
                print_row("Xnewtable",newtable,i, N, rank);
        }
        MPI_Barrier(MPI_COMM_WORLD);
    }

/*    if(!rank)
        for(i=0; i<N; i++)
            print_row("newtable", newtable,i, N, rank);*/

    for(i=0; i <last; i++) ranks[i] = i;

    MPI_Comm_group(MPI_COMM_WORLD, &group_world);
    MPI_Group_incl(group_world, last, ranks, &group_last);
    MPI_Comm_create(MPI_COMM_WORLD, group_last, &COMM_LAST);

    for(i = 0; i < N; i+= size ) {
        if(!rank)printf("scatter <%d>\n", i);
        if (i < (N - last)) {
            MPI_Scatter(
                    &newtable[i * N + 0], 1, subarray,
                    &newtable[(i + rank) * N + 0], 1, subarray,
                    0,MPI_COMM_WORLD);
        } else {
            if (rank < last) {
                MPI_Scatter(
                        &newtable[i * N + 0], 1, subarray,
                        &newtable[ (i + rank) * N + 0], 1, subarray,
                        0, COMM_LAST);
            }
        }
    }
    MPI_Barrier(MPI_COMM_WORLD);
    
    for(r = 0; r < size; r++) {
        MPI_Barrier(MPI_COMM_WORLD);
        if(rank == r) {
            printf("@[%d] -> \n", rank);
            for(i=0; i<N; i++)
                print_row("Xnewtable",newtable,i, N, rank);
        }
        MPI_Barrier(MPI_COMM_WORLD);
    }

    MPI_Barrier(MPI_COMM_WORLD);
    printf("@[%d] ......\n", rank);
    MPI_Barrier(MPI_COMM_WORLD);

    // reset our little matrix
    if (rank == 0) {
        int k = 0;
        for ( i=0; i<N; i++) 
            for ( j=0; j<N; j++ ) 
                newtable[i * N  + j] = 0;
        for(r = 0; r < size; r++) {
            printf("@[%d]\n", rank);
            if(rank == r) {
                for(i=0; i<N; i++)
                    print_row("newtable",newtable, i, N, rank);
            }
        }
    }
    MPI_Barrier(MPI_COMM_WORLD);

    for(i = 0; i < N; i+= size ) {
        if(!rank)printf("gather <%d>\n", i);
        if (i < (N - last)) {
            MPI_Gather(
                    &newtable[ ( i + rank ) * N + 0], 1, subarray,
                    &newtable[i * N + 0], 1, subarray,
                    0, MPI_COMM_WORLD);
        } else {
            if (rank < last) {
                MPI_Gather(
                        &newtable[(i + rank) * N + 0], 1, subarray,
                        &newtable[i*N + 0], 1, subarray,
                        0,COMM_LAST);
            }
        }
    }
    if (rank == 0) {
        for(r = 0; r < size; r++) {
            printf("@[%d]\n", rank);
            if(rank == r) {
                for(i=0; i<N; i++)
                    print_row("newtable",newtable,i, N, rank);
            }
        }
    }

    MPI_Group_free(&group_last);
    if (rank < last) 
        MPI_Comm_free(&COMM_LAST);
    MPI_Finalize();
    return 0;
}
Ejemplo n.º 8
0
void gmx_setup_nodecomm(FILE *fplog, t_commrec *cr)
{
    gmx_nodecomm_t *nc;
    int             n, rank, hostnum, ng, ni;

    /* Many MPI implementations do not optimize MPI_Allreduce
     * (and probably also other global communication calls)
     * for multi-core nodes connected by a network.
     * We can optimize such communication by using one MPI call
     * within each node and one between the nodes.
     * For MVAPICH2 and Intel MPI this reduces the time for
     * the global_stat communication by 25%
     * for 2x2-core 3 GHz Woodcrest connected by mixed DDR/SDR Infiniband.
     * B. Hess, November 2007
     */

    nc = &cr->nc;

    nc->bUse = FALSE;
#ifndef GMX_THREAD_MPI
#ifdef GMX_MPI
    MPI_Comm_size(cr->mpi_comm_mygroup, &n);
    MPI_Comm_rank(cr->mpi_comm_mygroup, &rank);

    hostnum = gmx_hostname_num();

    if (debug)
    {
        fprintf(debug, "In gmx_setup_nodecomm: splitting communicator of size %d\n", n);
    }


    /* The intra-node communicator, split on node number */
    MPI_Comm_split(cr->mpi_comm_mygroup, hostnum, rank, &nc->comm_intra);
    MPI_Comm_rank(nc->comm_intra, &nc->rank_intra);
    if (debug)
    {
        fprintf(debug, "In gmx_setup_nodecomm: node rank %d rank_intra %d\n",
                rank, nc->rank_intra);
    }
    /* The inter-node communicator, split on rank_intra.
     * We actually only need the one for rank=0,
     * but it is easier to create them all.
     */
    MPI_Comm_split(cr->mpi_comm_mygroup, nc->rank_intra, rank, &nc->comm_inter);
    /* Check if this really created two step communication */
    MPI_Comm_size(nc->comm_inter, &ng);
    MPI_Comm_size(nc->comm_intra, &ni);
    if (debug)
    {
        fprintf(debug, "In gmx_setup_nodecomm: groups %d, my group size %d\n",
                ng, ni);
    }

    if (getenv("GMX_NO_NODECOMM") == NULL &&
        ((ng > 1 && ng < n) || (ni > 1 && ni < n)))
    {
        nc->bUse = TRUE;
        if (fplog)
        {
            fprintf(fplog, "Using two step summing over %d groups of on average %.1f processes\n\n",
                    ng, (real)n/(real)ng);
        }
        if (nc->rank_intra > 0)
        {
            MPI_Comm_free(&nc->comm_inter);
        }
    }
    else
    {
        /* One group or all processes in a separate group, use normal summing */
        MPI_Comm_free(&nc->comm_inter);
        MPI_Comm_free(&nc->comm_intra);
        if (debug)
        {
            fprintf(debug, "In gmx_setup_nodecomm: not unsing separate inter- and intra-node communicators.\n");
        }
    }
#endif
#else
    /* tMPI runs only on a single node so just use the nodeid */
    nc->rank_intra = cr->nodeid;
#endif
}
Ejemplo n.º 9
0
Archivo: com.c Proyecto: 8l/insieme
void
runTest ( TESTPARAMS * testParams )
{
  int procIdx, procs, messIdx;
  MPI_Comm activeComm = MPI_COMM_NULL;
  int *messList, messListSize;
  int testCount, iters;
  unsigned int i;
  char buf[256];
  int width;
  double result;
  STATSTRUCT dp, sum;

  prestaRankPrint ( 0,
                    "\n\n%s Test Results \n(tasks, size, ops/sample, samples) : min/mean/max \n",
                    testParams->name );

  if ( argStruct.verbose )
  {
    if ( testParams->id == LATEN )
    {
      if ( argStruct.sumLocalBW == 1 )
      {
        sprintf ( buf, outputCharLBWFormat, "Test", "Processes",
                  "Op Size (bytes)", "Ops", "Latency (us)" );
      }
      else
      {
        sprintf ( buf, outputCharFormat, "Test", "Processes",
                  "Op Size (bytes)", "Ops", "BW (MB)", "Latency (us)" );
      }
    }
    else
    {
      if ( argStruct.sumLocalBW == 1 )
      {
        sprintf ( buf, outputCharLBWFormat, "Test", "Processes",
                  "Op Size (bytes)", "Ops", "BW (MB)" );
      }
      else
      {
        sprintf ( buf, outputCharFormat, "Test", "Processes",
                  "Op Size (bytes)", "Ops", "BW (MB)", "Op time (us)" );
      }
    }
    prestaRankPrint ( 0, "%s", buf );
    width = strlen ( buf );
  }
  else
    width = 80;

  for ( i = 0; i < width; i++ )
    buf[i] = '-';
  buf[i] = '\0';

  prestaRankPrint ( 0, "%s\n", buf );

  messList = testParams->messList;
  messListSize = testParams->messListSize;

  testParams->maxBW = 0.0;
  testParams->maxBWMessSize = 0;

  init_stats ( &sum, 0, 0, 0, 0 );

  for ( procIdx = 0; procIdx < argStruct.procListSize; procIdx++ )
  {
    procs = argStruct.procList[procIdx];
    if ( procs > wsize )
      procs = wsize;

    /*  Create Communicator of all active processes  */
    procs = createActiveComm ( procs, argStruct.procsPerNode,
                               argStruct.allocPattern,
                               argStruct.useNearestRank, &activeComm );

    prestaDebug ( "rank %d returned from createActiveCom\n", rank );
    prestaDebug ( "messListSize is %d \n", messListSize );

    for ( messIdx = 0; messIdx < messListSize; messIdx++ )
    {
      if ( argStruct.iterList != NULL && argStruct.iterList[messIdx] != 0 )
        iters = argStruct.iterList[messIdx];
      else
        iters = argStruct.iters;

      if ( argStruct.testCountList != NULL )
      {
        fprintf ( stderr,
                  "Before init_stats! messIdx is %d, procIdx is %d, testCountList is %p\n",
                  messIdx, procIdx, argStruct.testCountList );
        init_stats ( &dp, procs, messList[messIdx], iters,
                     argStruct.testCountList[procIdx] );
      }
      else
      {
        init_stats ( &dp, procs, messList[messIdx], iters, argStruct.samples );
      }

      for ( testCount = 0; testCount < dp.samples; testCount++ )
      {
        /*  Run test and save current result  */
        testParams->rankResult =
          testParams->testFunc ( dp.msize, iters, &activeComm );

        /*  TODO : Remove this if unnecessary   */
        if ( testParams->rankResult < minTime )
          minTime = testParams->rankResult;

        if ( !generateResults
             ( testParams, procs, dp.msize, iters, &result ) )
          prestaAbort ( "Failed to generate test results." );

        update_stats ( &dp, result );
        update_stats ( &sum, result );
      }

      if ( testParams->id == LATEN )
      {
        prestaRankPrint ( 0,
                          "(%6d, %9d, %6d, %6d):  %6.3f / %6.3f / %6.3f\n",
                          dp.tasks, dp.msize, dp.iters, dp.samples, dp.min,
                          dp.mean, dp.max );
      }
      else
      {
        prestaRankPrint ( 0,
                          "(%6d, %7d, %5d, %5d):  %12.3f / %12.3f / %12.3f\n",
                          dp.tasks, dp.msize, dp.iters, dp.samples, dp.min,
                          dp.mean, dp.max );
      }
    }
  }

  if ( testParams->id == LATEN )
  {
    prestaRankPrint ( 0,
                      "\nSummary  :             min/mean/max = %6.3f / %6.3f / %6.3f\n",
                      sum.min, sum.mean, sum.max );
  }
  else
  {
    prestaRankPrint ( 0,
                      "\nSummary  :         min/mean/max = %12.3f / %12.3f / %12.3f\n",
                      sum.min, sum.mean, sum.max );
  }

  if ( rank == 0 && argStruct.printPairs )
  {
    printActivePairs ( procs, argStruct.procsPerNode,
                       argStruct.allocPattern, argStruct.useNearestRank );
  }

  if ( activeComm != MPI_COMM_NULL )
    MPI_Comm_free ( &activeComm );
}
Ejemplo n.º 10
0
/*************************************************************************
* This function is the entry point of the initial balancing algorithm.
* This algorithm assembles the graph to all the processors and preceeds
* with the balancing step.
**************************************************************************/
void Balance_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace)
{
    int i, j, mype, npes, nvtxs, nedges, ncon;
    idxtype *vtxdist, *xadj, *adjncy, *adjwgt, *vwgt, *vsize;
    idxtype *part, *lwhere, *home;
    GraphType *agraph, cgraph;
    CtrlType myctrl;
    int lnparts, fpart, fpe, lnpes, ngroups, srnpes, srmype;
    int twoparts=2, numflag = 0, wgtflag = 3, moptions[10], edgecut, max_cut;
    int sr_pe, gd_pe, sr, gd, who_wins, *rcounts, *rdispls;
    float my_cut, my_totalv, my_cost = -1.0, my_balance = -1.0, wsum;
    float rating, max_rating, your_cost = -1.0, your_balance = -1.0;
    float lbvec[MAXNCON], lbsum, min_lbsum, *mytpwgts, mytpwgts2[2], buffer[2];
    MPI_Status status;
    MPI_Comm ipcomm, srcomm;
    struct {
        float cost;
        int rank;
    } lpecost, gpecost;

    IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr));

    vtxdist = graph->vtxdist;
    agraph = Mc_AssembleAdaptiveGraph(ctrl, graph, wspace);
    nvtxs = cgraph.nvtxs = agraph->nvtxs;
    nedges = cgraph.nedges = agraph->nedges;
    ncon = cgraph.ncon = agraph->ncon;

    xadj = cgraph.xadj = idxmalloc(nvtxs*(5+ncon)+1+nedges*2, "U_IP: xadj");
    vwgt = cgraph.vwgt = xadj + nvtxs+1;
    vsize = cgraph.vsize = xadj + nvtxs*(1+ncon)+1;
    cgraph.where = agraph->where = part = xadj + nvtxs*(2+ncon)+1;
    lwhere = xadj + nvtxs*(3+ncon)+1;
    home = xadj + nvtxs*(4+ncon)+1;
    adjncy = cgraph.adjncy = xadj + nvtxs*(5+ncon)+1;
    adjwgt = cgraph.adjwgt = xadj + nvtxs*(5+ncon)+1 + nedges;

    /* ADD: this assumes that tpwgts for all constraints is the same */
    /* ADD: this is necessary because serial metis does not support the general case */
    mytpwgts = fsmalloc(ctrl->nparts, 0.0, "mytpwgts");
    for (i=0; i<ctrl->nparts; i++)
        for (j=0; j<ncon; j++)
            mytpwgts[i] += ctrl->tpwgts[i*ncon+j];
    for (i=0; i<ctrl->nparts; i++)
        mytpwgts[i] /= (float)ncon;

    idxcopy(nvtxs+1, agraph->xadj, xadj);
    idxcopy(nvtxs*ncon, agraph->vwgt, vwgt);
    idxcopy(nvtxs, agraph->vsize, vsize);
    idxcopy(nedges, agraph->adjncy, adjncy);
    idxcopy(nedges, agraph->adjwgt, adjwgt);

    /****************************************/
    /****************************************/
    if (ctrl->ps_relation == DISCOUPLED) {
        rcounts = imalloc(ctrl->npes, "rcounts");
        rdispls = imalloc(ctrl->npes+1, "rdispls");

        for (i=0; i<ctrl->npes; i++) {
            rdispls[i] = rcounts[i] = vtxdist[i+1]-vtxdist[i];
        }
        MAKECSR(i, ctrl->npes, rdispls);

        MPI_Allgatherv((void *)graph->home, graph->nvtxs, IDX_DATATYPE,
                       (void *)part, rcounts, rdispls, IDX_DATATYPE, ctrl->comm);

        for (i=0; i<agraph->nvtxs; i++)
            home[i] = part[i];

        GKfree((void **)&rcounts, (void **)&rdispls, LTERM);
    }
    else {
        for (i=0; i<ctrl->npes; i++)
            for (j=vtxdist[i]; j<vtxdist[i+1]; j++)
                part[j] = home[j] = i;
    }

    /* Ensure that the initial partitioning is legal */
    for (i=0; i<agraph->nvtxs; i++) {
        if (part[i] >= ctrl->nparts)
            part[i] = home[i] = part[i] % ctrl->nparts;
        if (part[i] < 0)
            part[i] = home[i] = (-1*part[i]) % ctrl->nparts;
    }
    /****************************************/
    /****************************************/

    IFSET(ctrl->dbglvl, DBG_REFINEINFO, Mc_ComputeSerialBalance(ctrl, agraph, agraph->where, lbvec));
    IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "input cut: %d, balance: ", ComputeSerialEdgeCut(agraph)));
    for (i=0; i<agraph->ncon; i++)
        IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "%.3f ", lbvec[i]));
    IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "\n"));

    /****************************************/
    /* Split the processors into two groups */
    /****************************************/
    sr = (ctrl->mype % 2 == 0) ? 1 : 0;
    gd = (ctrl->mype % 2 == 1) ? 1 : 0;

    if (graph->ncon > MAX_NCON_FOR_DIFFUSION || ctrl->npes == 1) {
        sr = 1;
        gd = 0;
    }

    sr_pe = 0;
    gd_pe = 1;

    MPI_Comm_split(ctrl->gcomm, sr, 0, &ipcomm);
    MPI_Comm_rank(ipcomm, &mype);
    MPI_Comm_size(ipcomm, &npes);

    myctrl.dbglvl = 0;
    myctrl.mype = mype;
    myctrl.npes = npes;
    myctrl.comm = ipcomm;
    myctrl.sync = ctrl->sync;
    myctrl.seed = ctrl->seed;
    myctrl.nparts = ctrl->nparts;
    myctrl.ipc_factor = ctrl->ipc_factor;
    myctrl.redist_factor = ctrl->redist_base;
    myctrl.partType = ADAPTIVE_PARTITION;
    myctrl.ps_relation = DISCOUPLED;
    myctrl.tpwgts = ctrl->tpwgts;
    icopy(ncon, ctrl->tvwgts, myctrl.tvwgts);
    icopy(ncon, ctrl->ubvec, myctrl.ubvec);

    if (sr == 1) {
        /*******************************************/
        /* Half of the processors do scratch-remap */
        /*******************************************/
        ngroups = amax(amin(RIP_SPLIT_FACTOR, npes), 1);
        MPI_Comm_split(ipcomm, mype % ngroups, 0, &srcomm);
        MPI_Comm_rank(srcomm, &srmype);
        MPI_Comm_size(srcomm, &srnpes);

        moptions[0] = 0;
        moptions[7] = ctrl->sync + (mype % ngroups) + 1;

        idxset(nvtxs, 0, lwhere);
        lnparts = ctrl->nparts;
        fpart = fpe = 0;
        lnpes = srnpes;
        while (lnpes > 1 && lnparts > 1) {
            ASSERT(ctrl, agraph->nvtxs > 1);
            /* Determine the weights of the partitions */
            mytpwgts2[0] = ssum(lnparts/2, mytpwgts+fpart);
            mytpwgts2[1] = 1.0-mytpwgts2[0];


            if (agraph->ncon == 1) {
                METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt,
                                      agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut,
                                      part);
            }
            else {
                METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy,
                                            agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2,
                                            moptions, &edgecut, part);
            }

            wsum = ssum(lnparts/2, mytpwgts+fpart);
            sscale(lnparts/2, 1.0/wsum, mytpwgts+fpart);
            sscale(lnparts-lnparts/2, 1.0/(1.0-wsum), mytpwgts+fpart+lnparts/2);

            /* I'm picking the left branch */
            if (srmype < fpe+lnpes/2) {
                Mc_KeepPart(agraph, wspace, part, 0);
                lnpes = lnpes/2;
                lnparts = lnparts/2;
            }
            else {
                Mc_KeepPart(agraph, wspace, part, 1);
                fpart = fpart + lnparts/2;
                fpe = fpe + lnpes/2;
                lnpes = lnpes - lnpes/2;
                lnparts = lnparts - lnparts/2;
            }
        }

        /* In case srnpes is greater than or equal to nparts */
        if (lnparts == 1) {
            /* Only the first process will assign labels (for the reduction to work) */
            if (srmype == fpe) {
                for (i=0; i<agraph->nvtxs; i++)
                    lwhere[agraph->label[i]] = fpart;
            }
        }
        /* In case srnpes is smaller than nparts */
        else {
            if (ncon == 1)
                METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt,
                                      agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions,
                                      &edgecut, part);
            else
                METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy,
                                            agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart,
                                            moptions, &edgecut, part);

            for (i=0; i<agraph->nvtxs; i++)
                lwhere[agraph->label[i]] = fpart + part[i];
        }

        MPI_Allreduce((void *)lwhere, (void *)part, nvtxs, IDX_DATATYPE, MPI_SUM, srcomm);

        edgecut = ComputeSerialEdgeCut(&cgraph);
        Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec);
        lbsum = ssum(ncon, lbvec);
        MPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, MPI_INT, MPI_MAX, ipcomm);
        MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ipcomm);
        lpecost.rank = ctrl->mype;
        lpecost.cost = lbsum;
        if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) {
            if (lbsum < UNBALANCE_FRACTION * (float)(ncon))
                lpecost.cost = (float)edgecut;
            else
                lpecost.cost = (float)max_cut + lbsum;
        }
        MPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_FLOAT_INT, MPI_MINLOC, ipcomm);

        if (ctrl->mype == gpecost.rank && ctrl->mype != sr_pe) {
            MPI_Send((void *)part, nvtxs, IDX_DATATYPE, sr_pe, 1, ctrl->comm);
        }

        if (ctrl->mype != gpecost.rank && ctrl->mype == sr_pe) {
            MPI_Recv((void *)part, nvtxs, IDX_DATATYPE, gpecost.rank, 1, ctrl->comm, &status);
        }

        if (ctrl->mype == sr_pe) {
            idxcopy(nvtxs, part, lwhere);
            SerialRemap(&cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts);
        }

        MPI_Comm_free(&srcomm);
    }
    /**************************************/
    /* The other half do global diffusion */
    /**************************************/
    else {
        /******************************************************************/
        /* The next stmt is required to balance out the sr MPI_Comm_split */
        /******************************************************************/
        MPI_Comm_split(ipcomm, MPI_UNDEFINED, 0, &srcomm);

        if (ncon == 1) {
            rating = WavefrontDiffusion(&myctrl, agraph, home);
            Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec);
            lbsum = ssum(ncon, lbvec);

            /* Determine which PE computed the best partitioning */
            MPI_Allreduce((void *)&rating, (void *)&max_rating, 1, MPI_FLOAT, MPI_MAX, ipcomm);
            MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ipcomm);

            lpecost.rank = ctrl->mype;
            lpecost.cost = lbsum;
            if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) {
                if (lbsum < UNBALANCE_FRACTION * (float)(ncon))
                    lpecost.cost = rating;
                else
                    lpecost.cost = max_rating + lbsum;
            }

            MPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_FLOAT_INT, MPI_MINLOC, ipcomm);

            /* Now send this to the coordinating processor */
            if (ctrl->mype == gpecost.rank && ctrl->mype != gd_pe)
                MPI_Send((void *)part, nvtxs, IDX_DATATYPE, gd_pe, 1, ctrl->comm);

            if (ctrl->mype != gpecost.rank && ctrl->mype == gd_pe)
                MPI_Recv((void *)part, nvtxs, IDX_DATATYPE, gpecost.rank, 1, ctrl->comm, &status);

            if (ctrl->mype == gd_pe) {
                idxcopy(nvtxs, part, lwhere);
                SerialRemap(&cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts);
            }
        }
        else {
            Mc_Diffusion(&myctrl, agraph, graph->vtxdist, agraph->where, home, wspace, N_MOC_GD_PASSES);
        }
    }

    if (graph->ncon <= MAX_NCON_FOR_DIFFUSION) {
        if (ctrl->mype == sr_pe  || ctrl->mype == gd_pe) {
            /********************************************************************/
            /* The coordinators from each group decide on the best partitioning */
            /********************************************************************/
            my_cut = (float) ComputeSerialEdgeCut(&cgraph);
            my_totalv = (float) Mc_ComputeSerialTotalV(&cgraph, home);
            Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec);
            my_balance = ssum(cgraph.ncon, lbvec);
            my_balance /= (float) cgraph.ncon;
            my_cost = ctrl->ipc_factor * my_cut + REDIST_WGT * ctrl->redist_base * my_totalv;

            IFSET(ctrl->dbglvl, DBG_REFINEINFO, printf("%s initial cut: %.1f, totalv: %.1f, balance: %.3f\n",
                    (ctrl->mype == sr_pe ? "scratch-remap" : "diffusion"), my_cut, my_totalv, my_balance));

            if (ctrl->mype == gd_pe) {
                buffer[0] = my_cost;
                buffer[1] = my_balance;
                MPI_Send((void *)buffer, 2, MPI_FLOAT, sr_pe, 1, ctrl->comm);
            }
            else {
                MPI_Recv((void *)buffer, 2, MPI_FLOAT, gd_pe, 1, ctrl->comm, &status);
                your_cost = buffer[0];
                your_balance = buffer[1];
            }
        }

        if (ctrl->mype == sr_pe) {
            who_wins = gd_pe;
            if ((my_balance < 1.1 && your_balance > 1.1) ||
                    (my_balance < 1.1 && your_balance < 1.1 && my_cost < your_cost) ||
                    (my_balance > 1.1 && your_balance > 1.1 && my_balance < your_balance)) {
                who_wins = sr_pe;
            }
        }

        MPI_Bcast((void *)&who_wins, 1, MPI_INT, sr_pe, ctrl->comm);
    }
    else {
        who_wins = sr_pe;
    }

    MPI_Bcast((void *)part, nvtxs, IDX_DATATYPE, who_wins, ctrl->comm);
    idxcopy(graph->nvtxs, part+vtxdist[ctrl->mype], graph->where);

    MPI_Comm_free(&ipcomm);
    GKfree((void **)&xadj, (void **)&mytpwgts, LTERM);

    /* For whatever reason, FreeGraph crashes here...so explicitly free the memory.
      FreeGraph(agraph);
    */
    GKfree((void **)&agraph->xadj, (void **)&agraph->adjncy, (void **)&agraph->vwgt, (void **)&agraph->nvwgt, LTERM);
    GKfree((void **)&agraph->vsize, (void **)&agraph->adjwgt, (void **)&agraph->label, LTERM);
    GKfree((void **)&agraph, LTERM);

    IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr));

}
Ejemplo n.º 11
0
 void TreeCommunicator::comm_destroy(void)
 {
     for (auto comm_it = m_comm.begin(); comm_it < m_comm.end(); ++comm_it) {
         MPI_Comm_free(&(*comm_it));
     }
 }
Ejemplo n.º 12
0
int main( int argc, char *argv[] )
{
    int wrank, wsize, rank, size, color;
    int j, tmp;
    int err, toterrs, errs = 0;
    MPI_Comm newcomm;

    MPI_Init( &argc, &argv );

    MPI_Comm_size( MPI_COMM_WORLD, &wsize );
    MPI_Comm_rank( MPI_COMM_WORLD, &wrank );

    /* Color is 0 or 1; 1 will be the processes that "fault" */
    /* process 0 and wsize/2+1...wsize-1 are in non-faulting group */
    color = (wrank > 0) && (wrank <= wsize/2);
    MPI_Comm_split( MPI_COMM_WORLD, color, wrank, &newcomm );

    MPI_Comm_size( newcomm, &size );
    MPI_Comm_rank( newcomm, &rank );

    /* Set errors return on COMM_WORLD and the new comm */
    MPI_Comm_set_errhandler( MPI_ERRORS_RETURN, MPI_COMM_WORLD );
    MPI_Comm_set_errhandler( MPI_ERRORS_RETURN, newcomm );

    err = MPI_Barrier( MPI_COMM_WORLD );
    if (err) errs += ReportErr( err, "Barrier" );
    if (color) {
	/* Simulate a fault on some processes */
	exit(1);
    }
    else {
	/* To improve the chance that the "faulted" processes will have
	   exited, wait for 1 second */
	sleep( 1 );
    }
    
    /* Can we still use newcomm? */
    for (j=0; j<rank; j++) {
	err = MPI_Recv( &tmp, 1, MPI_INT, j, 0, newcomm, MPI_STATUS_IGNORE );
	if (err) errs += ReportErr( err, "Recv" );
    }
    for (j=rank+1; j<size; j++) {
	err = MPI_Send( &rank, 1, MPI_INT, j, 0, newcomm );
	if (err) errs += ReportErr( err, "Recv" );
    }

    /* Now, try sending in MPI_COMM_WORLD on dead processes */
    /* There is a race condition here - we don't know for sure that the faulted
       processes have exited.  However, we can ensure a failure by using 
       synchronous sends - the sender will wait until the reciever handles 
       receives the message, which will not happen (the process will exit 
       without matching the message, even if it has not yet exited). */
    for (j=1; j<=wsize/2; j++) {
	err = MPI_Ssend( &rank, 1, MPI_INT, j, 0, MPI_COMM_WORLD );
	if (!err) {
	    errs++;
	    fprintf( stderr, "Ssend succeeded to dead process %d\n", j );
	}
    }

    err = MPI_Allreduce( &errs, &toterrs, 1, MPI_INT, MPI_SUM, newcomm );
    if (err) errs += ReportErr( err, "Allreduce" );
    MPI_Comm_free( &newcomm );

    MPI_Finalize();

    if (wrank == 0) {
	if (toterrs > 0) {
	    printf( " Found %d errors\n", toterrs );
	}
	else {
	    printf( " No Errors\n" );
	}
    }

    return 0;
}
Ejemplo n.º 13
0
SEXP Rhpc_gethandle(SEXP procs)
{
  int num_procs;
  MPI_Comm *ptr;
  SEXP com;
  int num;
  MPI_Comm pcomm;

  if (RHPC_Comm == MPI_COMM_NULL){
    error("Rhpc_initialize is not called.");
    return(R_NilValue);
  }
  if(finalize){
    warning("Rhpc were already finalized.");
    return(R_NilValue);
  }
  if(!initialize){
    warning("Rhpc not initialized.");
    return(R_NilValue);
  }
 
  num_procs = INTEGER (procs)[0];

  ptr = Calloc(1,MPI_Comm);
  PROTECT(com = R_MakeExternalPtr(ptr, R_NilValue, R_NilValue));
  R_RegisterCFinalizer(com, comm_free);
  SXP2COMM(com) = RHPC_Comm;

  if (num_procs == NA_INTEGER){/* use mpirun */
    _M(MPI_Comm_size(SXP2COMM(com), &num));
    Rprintf("Detected communication size %d\n", num);
    if( num > 1 ){
      if ( num_procs > 0){
	warning("blind procs argument, return of MPI_COMM_WORLD");
      }
    }else{
      if ( num == 1){
	warning("only current master process. not found worker process.");
      }
      SXP2COMM(com)=MPI_COMM_NULL;
      warning("please pecifies the number of processes in mpirun or mpiexec, or provide a number of process to spawn");
    }
    UNPROTECT(1);
    return(com);
  }else{ /* spawn */
    if(num_procs < 1){
      warning("you need positive number of procs argument");
      UNPROTECT(1);
      return(com);
    }
    _M(MPI_Comm_size(SXP2COMM(com), &num));
    if(num > 1){ 
      warning("blind procs argument, return of last communicator");
      UNPROTECT(1);
      return(com);
    }
  }

  _M(MPI_Comm_spawn(RHPC_WORKER_CMD, MPI_ARGV_NULL, num_procs,
		    MPI_INFO_NULL, 0, MPI_COMM_SELF, &pcomm,  
		    MPI_ERRCODES_IGNORE));
  _M(MPI_Intercomm_merge( pcomm, 0, SXP2COMMP(com)));
  _M(MPI_Comm_free( &pcomm ));
  _M(MPI_Comm_size(SXP2COMM(com), &num));
  RHPC_Comm = SXP2COMM(com); /* rewrite RHPC_Comm */
  _M(MPI_Comm_set_errhandler(RHPC_Comm, MPI_ERRORS_RETURN));
  _M(MPI_Comm_rank(RHPC_Comm, &MPI_rank));
  _M(MPI_Comm_size(RHPC_Comm, &MPI_procs));
  DPRINT("Rhpc_getHandle(MPI_Comm_spawn : rank:%d size:%d\n", MPI_rank, MPI_procs);
  Rhpc_set_options( MPI_rank, MPI_procs,RHPC_Comm);
  UNPROTECT(1);
  return(com);
}
Ejemplo n.º 14
0
int main(int argc, char *argv[])
{
    int errs = 0;
    int rank, size, rsize;
    int nsize, nrank;
    int minsize = 2;
    int isLeft;
    MPI_Comm comm, comm1, comm2, comm3, comm4;

    MTest_Init(&argc, &argv);

    /* The following illustrates the use of the routines to
     * run through a selection of communicators and datatypes.
     * Use subsets of these for tests that do not involve combinations
     * of communicators, datatypes, and counts of datatypes */
    while (MTestGetIntercomm(&comm, &isLeft, minsize)) {
        if (comm == MPI_COMM_NULL)
            continue;
        /* Determine the sender and receiver */
        MPI_Comm_rank(comm, &rank);
        MPI_Comm_remote_size(comm, &rsize);
        MPI_Comm_size(comm, &size);

        /* Try building intercomms */
        MPI_Intercomm_merge(comm, isLeft, &comm1);
        /* Check the size and ranks */
        MPI_Comm_size(comm1, &nsize);
        MPI_Comm_rank(comm1, &nrank);
        if (nsize != size + rsize) {
            errs++;
            printf("(1) Comm size is %d but should be %d\n", nsize, size + rsize);
            if (isLeft) {
                /* The left processes should be high */
                if (nrank != rsize + rank) {
                    errs++;
                    printf("(1) rank for high process is %d should be %d\n", nrank, rsize + rank);
                }
            }
            else {
                /* The right processes should be low */
                if (nrank != rank) {
                    errs++;
                    printf("(1) rank for low process is %d should be %d\n", nrank, rank);
                }
            }
        }

        MPI_Intercomm_merge(comm, !isLeft, &comm2);
        /* Check the size and ranks */
        MPI_Comm_size(comm1, &nsize);
        MPI_Comm_rank(comm1, &nrank);
        if (nsize != size + rsize) {
            errs++;
            printf("(2) Comm size is %d but should be %d\n", nsize, size + rsize);
            if (!isLeft) {
                /* The right processes should be high */
                if (nrank != rsize + rank) {
                    errs++;
                    printf("(2) rank for high process is %d should be %d\n", nrank, rsize + rank);
                }
            }
            else {
                /* The left processes should be low */
                if (nrank != rank) {
                    errs++;
                    printf("(2) rank for low process is %d should be %d\n", nrank, rank);
                }
            }
        }


        MPI_Intercomm_merge(comm, 0, &comm3);

        MPI_Intercomm_merge(comm, 1, &comm4);

        MPI_Comm_free(&comm1);
        MPI_Comm_free(&comm2);
        MPI_Comm_free(&comm3);
        MPI_Comm_free(&comm4);

        MTestFreeComm(&comm);
    }

    MTest_Finalize(errs);
    MPI_Finalize();
    return 0;
}
Ejemplo n.º 15
0
void nrnmpi_subworld_size(int n) {
	/* n is the size of a subworld, nrnmpi_numprocs (pc.nhost) */
	if (nrnmpi_use != 1) { return; }
	if (nrnmpi_comm != MPI_COMM_NULL) {asrt(MPI_Comm_free(&nrnmpi_comm)); }
	if (nrn_bbs_comm != MPI_COMM_NULL) {asrt(MPI_Comm_free(&nrn_bbs_comm)); }
	if (grp_bbs != MPI_GROUP_NULL) { asrt(MPI_Group_free(&grp_bbs)); }
	if (grp_net != MPI_GROUP_NULL) { asrt(MPI_Group_free(&grp_net)); }
	MPI_Group wg;
	asrt(MPI_Comm_group(nrnmpi_world_comm, &wg));
	int r = nrnmpi_myid_world;
	/* special cases */
	if (n == 1) {
		asrt(MPI_Group_incl(wg, 1, &r, &grp_net));
		asrt(MPI_Comm_dup(nrnmpi_world_comm, &nrn_bbs_comm));
		asrt(MPI_Comm_create(nrnmpi_world_comm, grp_net, &nrnmpi_comm));
		asrt(MPI_Comm_rank(nrnmpi_comm, &nrnmpi_myid));
		asrt(MPI_Comm_size(nrnmpi_comm, &nrnmpi_numprocs));
		asrt(MPI_Comm_rank(nrn_bbs_comm, &nrnmpi_myid_bbs));
		asrt(MPI_Comm_size(nrn_bbs_comm, &nrnmpi_numprocs_bbs));
	}else if (n == nrnmpi_numprocs_world) {
		asrt(MPI_Group_incl(wg, 1, &r, &grp_bbs));
		asrt(MPI_Comm_dup(nrnmpi_world_comm, &nrnmpi_comm));
		asrt(MPI_Comm_create(nrnmpi_world_comm, grp_bbs, &nrn_bbs_comm));
		asrt(MPI_Comm_rank(nrnmpi_comm, &nrnmpi_myid));
		asrt(MPI_Comm_size(nrnmpi_comm, &nrnmpi_numprocs));
		if (r == 0) {
			asrt(MPI_Comm_rank(nrn_bbs_comm, &nrnmpi_myid_bbs));
			asrt(MPI_Comm_size(nrn_bbs_comm, &nrnmpi_numprocs_bbs));
		}else{
			nrnmpi_myid_bbs = -1;
			nrnmpi_numprocs_bbs = -1;
		}
	}else{
		int nw = nrnmpi_numprocs_world;
		int nb = nw/n; /* nrnmpi_numprocs_bbs */
		int range[3];
		/* net is contiguous */
		range[0] = r/n;
		range[0] *= n; /* first */
		range[1] = range[0] + n - 1; /* last */
		range[2] = 1; /* stride */
		asrt(MPI_Group_range_incl(wg, 1, &range, &grp_net));
		asrt(MPI_Comm_create(nrnmpi_world_comm, grp_net, &nrnmpi_comm));
		asrt(MPI_Comm_rank(nrnmpi_comm, &nrnmpi_myid));
		asrt(MPI_Comm_size(nrnmpi_comm, &nrnmpi_numprocs));

		range[0] = 0; /* first */
		range[1] = nw - n; /* last */
		range[2] = n; /* stride */
		asrt(MPI_Group_range_incl(wg, 1, &range, &grp_bbs));
		asrt(MPI_Comm_create(nrnmpi_world_comm, grp_bbs, &nrn_bbs_comm));
		if (r%n == 0) { /* only rank 0 of the subworlds */
			asrt(MPI_Comm_rank(nrn_bbs_comm, &nrnmpi_myid_bbs));
			asrt(MPI_Comm_size(nrn_bbs_comm, &nrnmpi_numprocs_bbs));
		}else{
			nrnmpi_myid_bbs = -1;
			nrnmpi_numprocs_bbs = -1;
		}
	}
	asrt(MPI_Group_free(&wg));
}
Ejemplo n.º 16
0
int
main (int argc, char **argv)
{
  int nprocs = -1;
  int rank = -1;
  char processor_name[128];
  int namelen = 128;
  int buf0[buf_size];
  int buf1[buf_size];
  MPI_Status status;
  MPI_Comm comm;
  int drank, dnprocs;

  /* init */
  MPI_Init (&argc, &argv);
  MPI_Comm_size (MPI_COMM_WORLD, &nprocs);
  MPI_Comm_rank (MPI_COMM_WORLD, &rank);
  MPI_Get_processor_name (processor_name, &namelen);
  printf ("(%d) is alive on %s\n", rank, processor_name);
  fflush (stdout);

  MPI_Barrier (MPI_COMM_WORLD);

  if (nprocs < 3) {
    printf ("not enough tasks\n");
  }
  else {
    MPI_Comm_split (MPI_COMM_WORLD, rank % 2, nprocs - rank, &comm);
    
    if (comm != MPI_COMM_NULL) {
      MPI_Comm_size (comm, &dnprocs);
      MPI_Comm_rank (comm, &drank);

      if (dnprocs > 1) {
	if (drank == 0) {
	  memset (buf0, 0, buf_size);

	  MPI_Recv (buf1, buf_size, MPI_INT, 1, 0, comm, &status);
	
	  MPI_Send (buf0, buf_size, MPI_INT, 1, 0, comm);
	}
	else if (drank == 1) {
	  memset (buf1, 1, buf_size);

	  MPI_Recv (buf0, buf_size, MPI_INT, 0, 0, comm, &status);

	  MPI_Send (buf1, buf_size, MPI_INT, 0, 0, comm);
	}
      }
      else {
	printf ("(%d) Derived communicator too small (size = %d)\n",
		rank, dnprocs);
      }

      MPI_Comm_free (&comm);
    }
    else {
      printf ("(%d) Got MPI_COMM_NULL\n", rank);
    }
  }

  MPI_Barrier (MPI_COMM_WORLD);

  MPI_Finalize ();
  printf ("(%d) Finished normally\n", rank);
}
Ejemplo n.º 17
0
int main( int argc, char *argv[] )
{
    int errs = 0;
    int rank, size, source, dest, i;
    MPI_Comm      comm;
    MPI_Comm      tmpComm[NCOMM];
    MPI_Status    status;
    MPI_Request   req;
    int           *buf=0;

    MTest_Init( &argc, &argv );

    MPI_Comm_dup( MPI_COMM_WORLD, &comm );

    /* This is similar to the datatype test, except that we post
       an irecv on a simple data buffer but use a rank-reordered communicator.
       In this case, an error in handling the reference count will most 
       likely cause the program to hang, so this should be run only
       if (a) you are confident that the code is correct or (b) 
       a timeout is set for mpiexec 
    */

    MPI_Comm_rank( comm, &rank );
    MPI_Comm_size( comm, &size );

    if (size < 2) {
	fprintf( stderr, "This test requires at least two processes." );
	MPI_Abort( MPI_COMM_WORLD, 1 );
    }

    source  = 0;
    dest    = size - 1;

    if (rank == dest) {
	buf = (int *)malloc( NELM * sizeof(int) );
	for (i=0; i<NELM; i++) buf[i] = -i;
	MPI_Irecv( buf, NELM, MPI_INT, source, 0, comm, &req );
	MPI_Comm_free( &comm );

	if (comm != MPI_COMM_NULL) {
	    errs++;
	    printf( "Freed comm was not set to COMM_NULL\n" );
	}

	for (i=0; i<NCOMM; i++) {
	    MPI_Comm_split( MPI_COMM_WORLD, 0, size - rank, &tmpComm[i] );
	}

	MPI_Sendrecv( NULL, 0, MPI_INT, source, 1,
		      NULL, 0, MPI_INT, source, 1, MPI_COMM_WORLD, &status );

	MPI_Wait( &req, &status );
	for (i=0; i<NELM; i++) {
	    if (buf[i] != i) {
		errs++;
		if (errs < 10) {
		    printf( "buf[%d] = %d, expected %d\n", i, buf[i], i );
		}
	    }
	}
	for (i=0; i<NCOMM; i++) {
	    MPI_Comm_free( &tmpComm[i] );
	}
	free( buf );
    }
    else if (rank == source) {
	buf = (int *)malloc( NELM * sizeof(int) );
	for (i=0; i<NELM; i++) buf[i] = i;

	for (i=0; i<NCOMM; i++) {
	    MPI_Comm_split( MPI_COMM_WORLD, 0, size - rank, &tmpComm[i] );
	}
	/* Synchronize with the receiver */
	MPI_Sendrecv( NULL, 0, MPI_INT, dest, 1,
		      NULL, 0, MPI_INT, dest, 1, MPI_COMM_WORLD, &status );
	MPI_Send( buf, NELM, MPI_INT, dest, 0, comm );
	free( buf );
    }
    else {
	for (i=0; i<NCOMM; i++) {
	    MPI_Comm_split( MPI_COMM_WORLD, 0, size - rank, &tmpComm[i] );
	}
    }

    MPI_Barrier( MPI_COMM_WORLD );

    if (rank != dest) {
	/* Clean up the communicators */
	for (i=0; i<NCOMM; i++) {
	    MPI_Comm_free( &tmpComm[i] );
	}
    }
    if (comm != MPI_COMM_NULL) {
	MPI_Comm_free( &comm );
    }
    
    MTest_Finalize( errs );
    MPI_Finalize();
    return 0;
}
Ejemplo n.º 18
0
int main(int argc, char **argv) {
    int      i, j, rank, nproc;
    int      shm_rank, shm_nproc;
    MPI_Info alloc_shared_info;
    int      errors = 0, all_errors = 0;
    int      disp_unit;
    int     *my_base;
    MPI_Win  shm_win;
    MPI_Comm shm_comm;

    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);

    MPI_Info_create(&alloc_shared_info);
    MPI_Info_set(alloc_shared_info, "alloc_shared_noncontig", "true");

#ifdef TEST_MPI3_ROUTINES

    MPIX_Comm_split_type(MPI_COMM_WORLD, MPIX_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &shm_comm);

    MPI_Comm_rank(shm_comm, &shm_rank);
    MPI_Comm_size(shm_comm, &shm_nproc);

    /* Allocate ELEM_PER_PROC integers for each process */
    MPIX_Win_allocate_shared(sizeof(int)*ELEM_PER_PROC, sizeof(int), alloc_shared_info, 
                             shm_comm, &my_base, &shm_win);

    MPIX_Win_lock_all(MPI_MODE_NOCHECK, shm_win);

    /* Write to all my data */
    for (i = 0; i < ELEM_PER_PROC; i++) {
        my_base[i] = i;
    }

    MPIX_Win_sync(shm_win);
    MPI_Barrier(shm_comm);
    MPIX_Win_sync(shm_win);

    /* Read and verify everyone's data */
    for (i = 0; i < shm_nproc; i++) {
        int      *base;
        MPI_Aint  size;

        MPIX_Win_shared_query(shm_win, i, &size, &disp_unit, &base);
        assert(size == ELEM_PER_PROC * sizeof(int));

        for (j = 0; j < ELEM_PER_PROC; j++) {
            if ( base[j] != j ) {
                errors++;
                printf("%d -- Got %d at rank %d index %d, expected %d\n", shm_rank, 
                       base[j], i, j, j);
            }
        }
    }

    MPIX_Win_unlock_all(shm_win);
    MPI_Win_free(&shm_win);
    MPI_Comm_free(&shm_comm);

#endif /* TEST_MPI3_ROUTINES */

    MPI_Info_free(&alloc_shared_info);

    MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);

    if (rank == 0 && all_errors == 0)
        printf(" No Errors\n");

    MPI_Finalize();

    return 0;
}
Ejemplo n.º 19
0
PetscErrorCode ISPairToList(IS xis, IS yis, PetscInt *listlen, IS **islist)
{
  PetscErrorCode ierr;
  IS             indis = xis, coloris = yis;
  PetscInt       *inds, *colors, llen, ilen, lstart, lend, lcount,l;
  PetscMPIInt    rank, size, llow, lhigh, low, high,color,subsize;
  const PetscInt *ccolors, *cinds;
  MPI_Comm       comm, subcomm;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(xis, IS_CLASSID, 1);
  PetscValidHeaderSpecific(yis, IS_CLASSID, 2);
  PetscCheckSameComm(xis,1,yis,2);
  PetscValidIntPointer(listlen,3);
  PetscValidPointer(islist,4);
  ierr = PetscObjectGetComm((PetscObject)xis,&comm);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm, &size);CHKERRQ(ierr);
  /* Extract, copy and sort the local indices and colors on the color. */
  ierr = ISGetLocalSize(coloris, &llen);CHKERRQ(ierr);
  ierr = ISGetLocalSize(indis,   &ilen);CHKERRQ(ierr);
  if (llen != ilen) SETERRQ2(comm, PETSC_ERR_ARG_SIZ, "Incompatible IS sizes: %D and %D", ilen, llen);
  ierr = ISGetIndices(coloris, &ccolors);CHKERRQ(ierr);
  ierr = ISGetIndices(indis, &cinds);CHKERRQ(ierr);
  ierr = PetscMalloc2(ilen,&inds,llen,&colors);CHKERRQ(ierr);
  ierr = PetscMemcpy(inds,cinds,ilen*sizeof(PetscInt));CHKERRQ(ierr);
  ierr = PetscMemcpy(colors,ccolors,llen*sizeof(PetscInt));CHKERRQ(ierr);
  ierr = PetscSortIntWithArray(llen, colors, inds);CHKERRQ(ierr);
  /* Determine the global extent of colors. */
  llow   = 0; lhigh  = -1;
  lstart = 0; lcount = 0;
  while (lstart < llen) {
    lend = lstart+1;
    while (lend < llen && colors[lend] == colors[lstart]) ++lend;
    llow  = PetscMin(llow,colors[lstart]);
    lhigh = PetscMax(lhigh,colors[lstart]);
    ++lcount;
  }
  ierr     = MPI_Allreduce(&llow,&low,1,MPI_INT,MPI_MIN,comm);CHKERRQ(ierr);
  ierr     = MPI_Allreduce(&lhigh,&high,1,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
  *listlen = 0;
  if (low <= high) {
    if (lcount > 0) {
      *listlen = lcount;
      if (!*islist) {
        ierr = PetscMalloc(sizeof(IS)*lcount, islist);CHKERRQ(ierr);
      }
    }
    /*
     Traverse all possible global colors, and participate in the subcommunicators
     for the locally-supported colors.
     */
    lcount = 0;
    lstart = 0; lend = 0;
    for (l = low; l <= high; ++l) {
      /*
       Find the range of indices with the same color, which is not smaller than l.
       Observe that, since colors is sorted, and is a subsequence of [low,high],
       as soon as we find a new color, it is >= l.
       */
      if (lstart < llen) {
        /* The start of the next locally-owned color is identified.  Now look for the end. */
        if (lstart == lend) {
          lend = lstart+1;
          while (lend < llen && colors[lend] == colors[lstart]) ++lend;
        }
        /* Now check whether the identified color segment matches l. */
        if (colors[lstart] < l) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Locally owned color %D at location %D is < than the next global color %D", colors[lstart], lcount, l);
      }
      color = (PetscMPIInt)(colors[lstart] == l);
      /* Check whether a proper subcommunicator exists. */
      ierr = MPI_Allreduce(&color,&subsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);

      if (subsize == 1) subcomm = PETSC_COMM_SELF;
      else if (subsize == size) subcomm = comm;
      else {
        /* a proper communicator is necessary, so we create it. */
        ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr);
      }
      if (colors[lstart] == l) {
        /* If we have l among the local colors, we create an IS to hold the corresponding indices. */
        ierr = ISCreateGeneral(subcomm, lend-lstart,inds+lstart,PETSC_COPY_VALUES,*islist+lcount);CHKERRQ(ierr);
        /* Position lstart at the beginning of the next local color. */
        lstart = lend;
        /* Increment the counter of the local colors split off into an IS. */
        ++lcount;
      }
      if (subsize > 0 && subsize < size) {
        /*
         Irrespective of color, destroy the split off subcomm:
         a subcomm used in the IS creation above is duplicated
         into a proper PETSc comm.
         */
        ierr = MPI_Comm_free(&subcomm);CHKERRQ(ierr);
      }
    } /* for (l = low; l < high; ++l) */
  } /* if (low <= high) */
  ierr = PetscFree2(inds,colors);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Ejemplo n.º 20
0
int main(int argc, char *argv[])
{
	double t1,t2,duration;
    
    if (argc > 1){
        Size = atoi(argv[argc-1]);
    }
	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);//Number of processes
	MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);//Rank of process
	t1 = MPI_Wtime();
	MPI_Group WorldGroup, CalculatorGroup;
	MPI_Comm Calculators;
	int ranks[1];
	ranks[0] = ProcNum-1;
	MPI_Comm_group(MPI_COMM_WORLD, &WorldGroup);
	MPI_Group_excl(WorldGroup, 1, ranks, &CalculatorGroup);
	MPI_Comm_create(MPI_COMM_WORLD,CalculatorGroup,&Calculators);

	
	int GridSize=sqrt((double)(ProcNum-1));//size of virtual topology(Grid)
	N=Size/GridSize+2;//N-size of subsystem; +2 - for boundary condition
    assert(GridSize*GridSize+1 == ProcNum);
	

	if (ProcRank!=ProcNum-1)
	{
		
		MPI_Datatype column;
		MPI_Type_vector(N,1,N,MPI_DOUBLE,&column);
		MPI_Type_commit(&column);

		//creating datatype for square
		MPI_Datatype square;
		MPI_Type_vector(N-2,N-2,N,MPI_DOUBLE,&square);
		MPI_Type_commit(&square);

		MPI_Comm GridComm;
		CreateGrid(GridSize, &GridComm, &Calculators);
		//Latt_Init1(N,"200last_state_gk_d0p1.bin");
		LattInit(N);
        SolveEquations(250, GridComm, GridSize, column, square);
		MPI_Type_free(&square);
		MPI_Type_free(&column);
		MPI_Group_free(&CalculatorGroup);
		MPI_Comm_free(&Calculators);
		delete[] V;
		delete[] Cell;
		delete[] I_ext;
	}
	else
	{
        convert_buf = new short[(N-2)*(N-2)*ProcNum];
		double* V_all = new double[(N-2)*(N-2)*ProcNum];
		double* V_temp = new double[(N-2)*(N-2)];
		short* V_save = new short[(N-2)*(N-2)*ProcNum];
		int ii;
		//printf("Total number of frames: %i\n", DrawNum);
#ifdef OS_WINDOWS
		int fd = open("200snapshots_gk_d0p1.bin",O_RDWR|O_CREAT | O_BINARY,S_IREAD|S_IWRITE);
#else
        int fd = open("snapshots.bin",O_RDWR|O_CREAT ,S_IREAD|S_IWRITE);
#endif
        for (int i=0;i<DrawNum*2;i++)
		{
			
			MPI_Gather(V_temp,(N-2)*(N-2),MPI_DOUBLE,V_all,(N-2)*(N-2),MPI_DOUBLE,ProcNum-1,MPI_COMM_WORLD);
			for (ii=0;ii<(N-2)*(N-2)*(ProcNum-1);ii++)
			{
				V_save[ii]=short(V_all[ii]*250.);
			}
            convertRst(V_save,convert_buf,Size,GridSize);
            save(convert_buf,Size*Size,fd);
        }
		close(fd);

		MPI_Gather(V_temp,(N-2)*(N-2),MPI_DOUBLE,V_all,(N-2)*(N-2),MPI_DOUBLE,ProcNum-1,MPI_COMM_WORLD);
		for (ii=0;ii<(N-2)*(N-2)*(ProcNum-1);ii++)
		{
			V_save[ii]=short(V_all[ii]*250.);
		}
#ifdef OS_WINDOWS
		fd = open("200last_V_gk_d0p1.bin",O_RDWR|O_CREAT | O_BINARY,S_IREAD|S_IWRITE);
#else
        fd = open("lastV.bin",O_RDWR|O_CREAT,S_IREAD|S_IWRITE);
#endif
		save(V_save,(N-2)*(N-2)*(ProcNum-1),fd);
		close(fd);

#ifdef OS_WINDOWS
		fd = open("200last_state_gk_d0p1.bin",O_RDWR|O_CREAT | O_BINARY,S_IREAD|S_IWRITE);
#else
        fd = open("state.bin",O_RDWR|O_CREAT ,S_IREAD|S_IWRITE);
#endif
		for (int i=0; i<9; i++)
		{
		MPI_Gather(V_temp,(N-2)*(N-2),MPI_DOUBLE,V_all,(N-2)*(N-2),MPI_DOUBLE,ProcNum-1,MPI_COMM_WORLD);
		save_double(V_all,(N-2)*(N-2)*(ProcNum-1),fd);
		}
		close(fd);



		delete[] V_all;
		delete[] V_temp;
		delete[] V_save;
        delete[] convert_buf;
	}



	
	
	t2 = MPI_Wtime();
	if (ProcRank==0)
	{
		printf("Experiment duration: %f seconds\n",t2-t1);
	}
	MPI_Finalize();
	
	return 0;
}
Ejemplo n.º 21
0
int
main (int argc, char **argv)
{
  int nprocs = -1;
  int rank = -1;
  char processor_name[128];
  int namelen = 128;
  int buf0[buf_size];
  int buf1[buf_size];
  MPI_Status status;
  MPI_Comm temp, intercomm;
  int trank, tnprocs;
  int drank, dnprocs, rleader, rnprocs;

  /* init */
  MPI_Init (&argc, &argv);
  MPI_Comm_size (MPI_COMM_WORLD, &nprocs);
  MPI_Comm_rank (MPI_COMM_WORLD, &rank);
  MPI_Get_processor_name (processor_name, &namelen);
  printf ("(%d) is alive on %s\n", rank, processor_name);
  fflush (stdout);

  MPI_Barrier (MPI_COMM_WORLD);

  if (nprocs < 3) {
    printf ("not enough tasks\n");
  }
  else {
    /* need to make split communicator temporarily... */
    MPI_Comm_split (MPI_COMM_WORLD, rank % 2, nprocs - rank, &temp);

    if (temp != MPI_COMM_NULL) {
      MPI_Comm_size (temp, &tnprocs);
      MPI_Comm_rank (temp, &trank);

      /* create an intercommunicator temporarily so can merge it... */
      rleader = ((rank + nprocs) % 2) ?  nprocs - 2 : nprocs - 1;

      if ((trank == 0) && (rank % 2)) {
	MPI_Recv (buf0, buf_size, MPI_INT,
		  rleader, 0, MPI_COMM_WORLD, &status);
      }

      MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD, rleader,
			    INTERCOMM_CREATE_TAG, &intercomm);

      if ((trank == 0) && !(rank % 2)) {
	memset (buf0, 0, buf_size);
	
	MPI_Send (buf0, buf_size, MPI_INT, 1, 0, temp);
      }
      else {
	printf ("(%d) Split communicator too small\n", rank);
      }

      MPI_Comm_free (&temp);

      if (intercomm != MPI_COMM_NULL) {
	MPI_Comm_size (intercomm, &dnprocs);
	MPI_Comm_rank (intercomm, &drank);
 	MPI_Comm_remote_size (intercomm, &rnprocs);

	if (rnprocs > drank) {
	  if (rank % 2) {
	    memset (buf1, 1, buf_size);

	    MPI_Recv (buf0, buf_size, MPI_INT, drank, 0, intercomm, &status);

	    MPI_Send (buf1, buf_size, MPI_INT, drank, 0, intercomm);
	  }
	  else {
	    memset (buf0, 0, buf_size);
	
	    MPI_Send (buf0, buf_size, MPI_INT, drank, 0, intercomm);
	
	    MPI_Recv (buf1, buf_size, MPI_INT, drank, 0, intercomm, &status);
	  }
	}
	else {
	  printf ("(%d) Intercomm too small (lrank = %d; remote size = %d)\n",
		  rank, drank, rnprocs);
	}

	MPI_Comm_free (&intercomm);
      }
      else {
	printf ("(%d) Got MPI_COMM_NULL\n", rank);
      }
    }
    else {
      printf ("(%d) MPI_Comm_split got MPI_COMM_NULL\n", rank);
    }
  }

  MPI_Barrier (MPI_COMM_WORLD);

  MPI_Finalize ();
  printf ("(%d) Finished normally\n", rank);
}
Ejemplo n.º 22
0
void DWorldDestroy( DWorld world ){
	MPI_Comm_free(&world->comm);
	free(world->rPack);
	free(world->sPack);
	free(world);
}
Ejemplo n.º 23
0
int main(int argc, char **argv) {

	int rankLeft[4] = {0, 1, 2, 3}, rankRight[4] = {4, 5, 6, 7};
	int i, result;
	char outStr[600];

	int nProcs, myRank;
	MPI_Group grpWorld, grpNew;
	MPI_Comm commNew;

	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
	MPI_Comm_rank(MPI_COMM_WORLD, &myRank);

	MPI_Comm_group(MPI_COMM_WORLD, &grpWorld);
	if (myRank < nProcs	/ 2) {
		MPI_Group_incl(grpWorld, nProcs / 2, rankLeft, &grpNew);
	} else {
		MPI_Group_incl(grpWorld, nProcs / 2, rankRight, &grpNew);
	}
	MPI_Comm_create(MPI_COMM_WORLD, grpNew, &commNew);

	int myRankCommNew, nProcsCommNew;
	int myRankGrpNew, nProcsGrpNew;

	MPI_Comm_rank(commNew, &myRankCommNew);
	MPI_Comm_size(commNew, &nProcsCommNew);
	MPI_Group_rank(grpNew, &myRankGrpNew);
	MPI_Group_size(grpNew, &nProcsGrpNew);

	fprintf(stdout, "WorldRank: %d in %d, NewCommRank: %d in %d, NewGrpRank: %d in %d\n",
		myRank, nProcs, myRankCommNew, nProcsCommNew, myRankGrpNew, nProcsGrpNew);

	MPI_Barrier(MPI_COMM_WORLD);

	int sendBuf = myRank, recvBuf;

	MPI_Allreduce(&sendBuf, &recvBuf, 1, MPI_INT, MPI_SUM, commNew);

	fprintf(stdout, "WorldRank = %d, sendBuf = %d, recvBuf = %d\n", myRank, sendBuf, recvBuf);
	fflush(stdout);

	MPI_Barrier(MPI_COMM_WORLD);

	int ranks1[8] = {0, 1, 2, 3, 4, 5, 6, 7}, ranks2[8];

	MPI_Group_compare(grpWorld, grpNew, &result);
	MPI_Group_translate_ranks(grpWorld, nProcs, ranks1, grpNew, ranks2);
	
	if (myRank == 0) {
		fprintf(stdout, "result = %d\n", result);
	}
	sprintf_s(outStr, "rank %d: ", myRank);
	for (i = 0; i < nProcs; i++) {
		sprintf_s(outStr, "%s%d = %d ", outStr, ranks1[i], ranks2[i]);
	}
	fprintf(stdout, "%s\n", outStr);


	MPI_Comm_free(&commNew);
	MPI_Group_free(&grpNew);
	MPI_Group_free(&grpWorld);

	MPI_Finalize();
	return 0;
}
Ejemplo n.º 24
0
int main(int argc, char** argv)
{
  int i, j, N, flag;
  Matrix A=NULL, Q=NULL;
  Vector b, grid, e, lambda=NULL;
  double time, sum, h, tol=1e-6;
  int rank, size;
  int mpi_top_coords;
  int mpi_top_sizes;

  init_app(argc, argv, &rank, &size);

  if (argc < 3) {
    printf("need two parameters, N and flag [and tolerance]\n");
    printf(" - N is the problem size (in each direction\n");
    printf(" - flag = 1  -> Matrix-free Gauss-Jacobi iterations\n");
    printf(" - flag = 2  -> Matrix-free red-black Gauss-Seidel iterations\n");
    printf(" - flag = 3  -> Matrix-free CG iterations\n");
    printf(" - flag = 4  -> Matrix-free additive schwarz preconditioned+Cholesky CG iterations\n");
    printf(" - flag = 5  -> Matrix-free additive schwarz preconditioned+CG CG iterations\n");
    return 1;
  }
  N=atoi(argv[1]);
  flag=atoi(argv[2]);
  if (argc > 3)
    tol=atof(argv[3]);

  if (N < 0) {
    if (rank == 0)
      printf("invalid problem size given\n");
    close_app();
    return 2;
  }

  if (flag < 0 || flag > 5) {
    if (rank == 0)
      printf("invalid flag given\n");
    close_app();
    return 3;
  }

  if (flag == 2 && (N-1)%2 != 0 && ((N-1)/size) % 2 != 0) {
    if (rank == 0)
      printf("need an even size (per process) for red-black iterations\n");
    close_app();
    return 4;
  }

  // setup topology
  mpi_top_coords = 0;
  mpi_top_sizes = 0;
  MPI_Dims_create(size, 1, &mpi_top_sizes);
  int periodic = 0;
  MPI_Comm comm;
  MPI_Cart_create(MPI_COMM_WORLD, 1, &mpi_top_sizes, &periodic, 0, &comm);
  MPI_Cart_coords(comm, rank, 1, &mpi_top_coords);

  b = createVectorMPI(N+1, &comm, 1, 1);
  e = createVectorMPI(N+1, &comm, 1, 1);

  grid = equidistantMesh(0.0, 1.0, N);
  h = 1.0/N;

  evalMeshDispl(b, grid, source);
  scaleVector(b, pow(h, 2));
  evalMeshDispl(e, grid, exact);
  axpy(b, e, alpha);
  b->data[0] = b->data[b->len-1] = 0.0;

  if (flag == 4) {
    int size = b->len;
    if (b->comm_rank == 0)
      size--;
    if (b->comm_rank == b->comm_size-1)
      size--;
    A1D = createMatrix(size, size);
    A1Dfactored = 0;
    diag(A1D, -1, -1.0);
    diag(A1D, 0, 2.0+alpha);
    diag(A1D, 1, -1.0);
  }

  int its=-1;
  char method[128];
  time = WallTime();
  if (flag == 1) {
    its=GaussJacobiPoisson1D(b, tol, 1000000);
    sprintf(method,"Gauss-Jacobi");
  }
  if (flag == 2) {
    its=GaussSeidelPoisson1Drb(b, tol, 1000000);
    sprintf(method,"Gauss-Seidel");
  }
  if (flag == 3) {
    its=cgMatrixFree(Poisson1D, b, tol);
    sprintf(method,"CG");
  }
  if (flag == 4 || flag == 5) {
    its=pcgMatrixFree(Poisson1D, Poisson1DPre, b, tol);
    sprintf(method,"PCG");
  }
  if (rank == 0) {
    printf("%s used %i iterations\n", method, its);
    printf("elapsed: %f\n", WallTime()-time);
  }

  evalMeshDispl(e, grid, exact);
  axpy(b,e,-1.0);
  b->data[0] = b->data[b->len-1] = 0.0;

  h = maxNorm(b);
  if (rank == 0)
    printf("max error: %e\n", h);
  
  if (A)
    freeMatrix(A);
  if (Q)
    freeMatrix(Q);
  freeVector(grid);
  freeVector(b);
  freeVector(e);
  if (lambda)
    freeVector(lambda);
  if (A1D)
    freeMatrix(A1D);

  MPI_Comm_free(&comm);

  close_app();
  return 0;
}
Ejemplo n.º 25
0
//=============================================================================
int Amesos_Mumps::SymbolicFactorization()
{

  // erase data if present. 
  if (IsSymbolicFactorizationOK_ && MDS.job != -777)
   Destroy();

  IsSymbolicFactorizationOK_ = false;
  IsNumericFactorizationOK_ = false;

  CreateTimer(Comm());
  
  CheckParameters();
  AMESOS_CHK_ERR(ConvertToTriplet(false));

#if defined(HAVE_MPI) && defined(HAVE_AMESOS_MPI_C2F)
  if (MaxProcs_ != Comm().NumProc()) 
  {
    if(MUMPSComm_) 
      MPI_Comm_free(&MUMPSComm_);

    std::vector<int> ProcsInGroup(MaxProcs_);
    for (int i = 0 ; i < MaxProcs_ ; ++i) 
      ProcsInGroup[i] = i;

    MPI_Group OrigGroup, MumpsGroup;
    MPI_Comm_group(MPI_COMM_WORLD, &OrigGroup);
    MPI_Group_incl(OrigGroup, MaxProcs_, &ProcsInGroup[0], &MumpsGroup);
    MPI_Comm_create(MPI_COMM_WORLD, MumpsGroup, &MUMPSComm_);

#ifdef MUMPS_4_9
    MDS.comm_fortran = (MUMPS_INT) MPI_Comm_c2f( MUMPSComm_);
#else

#ifndef HAVE_AMESOS_OLD_MUMPS
    MDS.comm_fortran = (DMUMPS_INT) MPI_Comm_c2f( MUMPSComm_);
#else
    MDS.comm_fortran = (F_INT) MPI_Comm_c2f( MUMPSComm_);
#endif

#endif

  } 
  else 
  {
    const Epetra_MpiComm* MpiComm = dynamic_cast<const Epetra_MpiComm*>(&Comm());
    assert (MpiComm != 0);
#ifdef MUMPS_4_9
    MDS.comm_fortran = (MUMPS_INT) MPI_Comm_c2f(MpiComm->GetMpiComm());
#else

#ifndef HAVE_AMESOS_OLD_MUMPS
    MDS.comm_fortran = (DMUMPS_INT) MPI_Comm_c2f(MpiComm->GetMpiComm());
#else
    MDS.comm_fortran = (F_INT) MPI_Comm_c2f(MpiComm->GetMpiComm());
#endif

#endif
  }
#else
  // This next three lines of code were required to make Amesos_Mumps work
  // with Ifpack_SubdomainFilter. They is usefull in all cases
  // when using MUMPS on a subdomain.
  const Epetra_MpiComm* MpiComm = dynamic_cast<const Epetra_MpiComm*>(&Comm());
  assert (MpiComm != 0);
  MDS.comm_fortran = (MUMPS_INT) MPI_Comm_c2f(MpiComm->GetMpiComm());
  // only thing I can do, use MPI_COMM_WORLD. This will work in serial as well
  // Previously, the next line was uncommented, but we don't want MUMPS to work
  // on the global MPI comm, but on the comm associated with the matrix
  //  MDS.comm_fortran = -987654;
#endif
  
  MDS.job = -1  ;     //  Initialization
  MDS.par = 1 ;       //  Host IS involved in computations
//  MDS.sym = MatrixProperty_;
  MDS.sym =  0;       //  MatrixProperty_ is unititalized.  Furthermore MUMPS 
                      //  expects only half of the matrix to be provided for
                      //  symmetric matrices.  Hence setting MDS.sym to be non-zero
                      //  indicating that the matrix is symmetric will only work
                      //  if we change ConvertToTriplet to pass only half of the 
                      //  matrix.  Bug #2331 and Bug #2332 - low priority


  RedistrMatrix(true);

  if (Comm().MyPID() < MaxProcs_) 
  {
    dmumps_c(&(MDS));   //  Initialize MUMPS
    static_cast<void>( CheckError( ) );  
  }

  MDS.n = Matrix().NumGlobalRows();

  // fix pointers for nonzero pattern of A. Numerical values
  // will be entered in PerformNumericalFactorization()
  if (Comm().NumProc() != 1) 
  {
    MDS.nz_loc = RedistrMatrix().NumMyNonzeros();

    if (Comm().MyPID() < MaxProcs_) 
    {
      MDS.irn_loc = &Row[0]; 
      MDS.jcn_loc = &Col[0];
    }
  } 
  else 
  {
    if (Comm().MyPID() == 0) 
    {
      MDS.nz = Matrix().NumMyNonzeros();
      MDS.irn = &Row[0]; 
      MDS.jcn = &Col[0]; 
    }
  }

  // scaling if provided by the user
  if (RowSca_ != 0) 
  {
    MDS.rowsca = RowSca_;
    MDS.colsca = ColSca_;
  }

  // given ordering if provided by the user
  if (PermIn_ != 0) 
  {
    MDS.perm_in = PermIn_;
  }

  MDS.job = 1;     // Request symbolic factorization

  SetICNTLandCNTL();

  // Perform symbolic factorization

  ResetTimer();

  if (Comm().MyPID() < MaxProcs_) 
    dmumps_c(&(MDS));

  SymFactTime_ = AddTime("Total symbolic factorization time", SymFactTime_);

  int IntWrong = CheckError()?1:0 ; 
  int AnyWrong;
  Comm().SumAll( &IntWrong, &AnyWrong, 1 ) ; 
  bool Wrong = AnyWrong > 0 ; 


  if ( Wrong ) {
      AMESOS_CHK_ERR( StructurallySingularMatrixError ) ; 
  }

  IsSymbolicFactorizationOK_ = true ;
  NumSymbolicFact_++;  

  return 0;
}
Ejemplo n.º 26
0
int main(int argc, char ** argv)
{
    int comm_size, comm_rank;
    MPI_Comm comm;

    MTest_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
    MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);

    if (LARGE_BUF * comm_size > MAX_BUF)
        goto fn_exit;

    SMPI_VARGET_GLOBAL(sbuf) = (void *) calloc(MAX_BUF, 1);
    SMPI_VARGET_GLOBAL(rbuf) = (void *) calloc(MAX_BUF, 1);

    srand(time(NULL));

    SMPI_VARGET_GLOBAL(recvcounts) = (void *) malloc(comm_size * sizeof(int));
    SMPI_VARGET_GLOBAL(displs) = (void *) malloc(comm_size * sizeof(int));
    if (!SMPI_VARGET_GLOBAL(recvcounts) || !SMPI_VARGET_GLOBAL(displs) || !SMPI_VARGET_GLOBAL(sbuf) || !SMPI_VARGET_GLOBAL(rbuf)) {
        fprintf(stderr, "Unable to allocate memory:\n");
	if (!SMPI_VARGET_GLOBAL(sbuf))
            fprintf(stderr,"\tsbuf of %d bytes\n", MAX_BUF );
	if (!SMPI_VARGET_GLOBAL(rbuf))
            fprintf(stderr,"\trbuf of %d bytes\n", MAX_BUF );
        if (!SMPI_VARGET_GLOBAL(recvcounts))
            fprintf(stderr,"\trecvcounts of %zu bytes\n", comm_size * sizeof(int));
        if (!SMPI_VARGET_GLOBAL(displs))
            fprintf(stderr,"\tdispls of %zu bytes\n", comm_size * sizeof(int));
        fflush(stderr);
        MPI_Abort(MPI_COMM_WORLD, -1);
        exit(-1);
    }

    if (!comm_rank) {
        dprintf("Message Range: (%d, %d); System size: %d\n", START_BUF, LARGE_BUF, comm_size);
        fflush(stdout);
    }


    /* COMM_WORLD tests */
    if (!comm_rank) {
        dprintf("\n\n==========================================================\n");
        dprintf("                         MPI_COMM_WORLD\n");
        dprintf("==========================================================\n");
    }
    comm_tests(MPI_COMM_WORLD);

    /* non-COMM_WORLD tests */
    if (!comm_rank) {
        dprintf("\n\n==========================================================\n");
        dprintf("                         non-COMM_WORLD\n");
        dprintf("==========================================================\n");
    }
    MPI_Comm_split(MPI_COMM_WORLD, (comm_rank == comm_size - 1) ? 0 : 1, 0, &comm);
    if (comm_rank < comm_size - 1)
        comm_tests(comm);
    MPI_Comm_free(&comm);

    /* Randomized communicator tests */
    if (!comm_rank) {
        dprintf("\n\n==========================================================\n");
        dprintf("                         Randomized Communicator\n");
        dprintf("==========================================================\n");
    }
    MPI_Comm_split(MPI_COMM_WORLD, 0, rand(), &comm);
    comm_tests(comm);
    MPI_Comm_free(&comm);

    //free(SMPI_VARGET_GLOBAL(sbuf));
    //free(SMPI_VARGET_GLOBAL(rbuf));
    free(SMPI_VARGET_GLOBAL(recvcounts));
    free(SMPI_VARGET_GLOBAL(displs));

fn_exit:
    MTest_Finalize(SMPI_VARGET_GLOBAL(errs));
    MPI_Finalize();

    return 0;
}
Ejemplo n.º 27
0
Archivo: mpla.cpp Proyecto: zaspel/MPLA
void mpla_redistribute_vector_for_dgesv(struct mpla_vector* b_redist, struct mpla_vector* b, struct mpla_matrix* A, struct mpla_instance* instance)
{
	// attention: this code does no correctness check for the input data



//	b_redist->vec_row_count = b->vec_row_count;
//
//	// allocating memory for process-wise vector information
//	vector->proc_row_count = new int*[instance->proc_rows];
//	vector->proc_row_offset = new int*[instance->proc_rows];
//	for (int i=0; i<instance->proc_rows; i++)
//	{
//		b_redist->proc_row_count[i] = new int[instance->proc_cols];
//		b_redist->proc_row_offset[i] = new int[instance->proc_cols];
//	}
//
//	// set sizes of 
//	for (int i=0; i<instance->proc_rows; i++)
//	{
//		for (int j=0; j<instance->proc_cols; j++)
//		{
//			b_redist->proc_row_count[i][j] = A->proc_col_count[i][j];
//			b_redist->proc_row_offset[i][j] = A->proc_col_offset[i][j];
//		}
//	}
//
//	// retrieving local data for current process
//	b_redist->cur_proc_row_count = A->cur_proc_col_count;
//	b_redist->cur_proc_row_offset = A->cur_proc_col_offset;
//
//	// allocating temporary vector storage
//	cudaMalloc((void*)&(b_redist->data), sizeof(double)*b_redist->cur_proc_row_count);

	// WARNING: The following code is not efficient for a strong parallelization !!!!!


	// create sub-communicator for each process column
	int remain_dims[2];
	remain_dims[0]=1;
	remain_dims[1]=0;
	MPI_Comm column_comm;
	MPI_Cart_sub(instance->comm, remain_dims, &column_comm);
	int column_rank;
	MPI_Comm_rank(column_comm, &column_rank);
	
	// columnwise creation of the full vector
	double* full_vector;
	int* recvcounts = new int[instance->proc_rows];
	int* displs = new int[instance->proc_rows];
	for (int i=0; i<instance->proc_rows; i++)
	{
		recvcounts[i] = b->proc_row_count[i][instance->cur_proc_col];
		displs[i] = b->proc_row_offset[i][instance->cur_proc_col];
	}
	cudaMalloc((void**)&full_vector, sizeof(double)*b->vec_row_count);
	cudaThreadSynchronize();
	checkCUDAError("cudaMalloc");
	MPI_Allgatherv(b->data, b->cur_proc_row_count, MPI_DOUBLE, full_vector, recvcounts, displs, MPI_DOUBLE, column_comm);

	// extract column-wise local part of full vector
	cudaMemcpy(b_redist->data, &(full_vector[b_redist->cur_proc_row_offset]), sizeof(double)*b_redist->cur_proc_row_count, cudaMemcpyDeviceToDevice);

	// memory cleanup
	cudaFree(full_vector);

	MPI_Comm_free(&column_comm);
}
Ejemplo n.º 28
0
int main(int argc, char **argv){
  ptrdiff_t n[3], gc_below[3], gc_above[3];
  ptrdiff_t local_ni[3], local_i_start[3];
  ptrdiff_t local_no[3], local_o_start[3];
  ptrdiff_t local_ngc[3], local_gc_start[3];
  ptrdiff_t alloc_local, alloc_local_gc;
  int np[3], rnk_self, size, verbose;
  double err;
  MPI_Comm comm_cart_2d;
  pfft_complex *cdata;
  pfft_gcplan ths;
  
  MPI_Init(&argc, &argv);
  pfft_init();
  MPI_Comm_rank(MPI_COMM_WORLD, &rnk_self);
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  
  /* default values */
  n[0] = n[1] = n[2] = 8; /*  n[0] = 3; n[1] = 5; n[2] = 7;*/
  np[0]=2; np[1]=2; np[2] = 1;

  verbose = 0;
  for(int t=0; t<3; t++){
    gc_below[t] = 0;
    gc_above[t] = 0;
  }
  gc_below[0] = 0;
  gc_above[0] = 4;

  /* set values by commandline */
  init_parameters(argc, argv, n, np, gc_below, gc_above, &verbose);

  /* Create two-dimensional process grid of size np[0] x np[1], if possible */
  if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]);
    MPI_Finalize();
    return 1;
  }

  /* Get parameters of data distribution */
  /* alloc_local, local_no, local_o_start are given in complex units */
  /* local_ni, local_i_start are given in real units */
  alloc_local = pfft_local_size_dft_r2c_3d(n, comm_cart_2d, PFFT_TRANSPOSED_NONE,
      local_ni, local_i_start, local_no, local_o_start);

  /* alloc_local_gc, local_ngc, local_gc_start are given in complex units */
  alloc_local_gc = pfft_local_size_gc_3d(
      local_no, local_o_start, gc_below, gc_above,
      local_ngc, local_gc_start);

  /* Allocate enough memory for FFT and ghost cells */
  cdata = pfft_alloc_complex(alloc_local_gc > alloc_local ? alloc_local_gc : alloc_local);

  /* Plan parallel ghost cell send */
  ths = pfft_plan_cgc_3d(n, gc_below, gc_above,
      cdata, comm_cart_2d, PFFT_GC_TRANSPOSED_NONE | PFFT_GC_R2C);

  /* Initialize input with random numbers */
  pfft_init_input_complex_3d(n, local_no, local_o_start,
      cdata);

  /* check gcell input */
  if(verbose)
    pfft_apr_complex_3d(cdata, local_no, local_o_start, "gcell input", comm_cart_2d);

  /* Execute parallel ghost cell send */
  pfft_exchange(ths);

  /* Check gcell output */
  if(verbose)
    pfft_apr_complex_3d(cdata, local_ngc, local_gc_start, "exchanged gcells", comm_cart_2d);
  
  /* Execute adjoint parallel ghost cell send */
  pfft_reduce(ths);

  /* check input */
  if(verbose)
    pfft_apr_complex_3d(cdata, local_no, local_o_start, "reduced gcells", comm_cart_2d);

  /* Scale data */
  for(ptrdiff_t l=0; l < local_no[0] * local_no[1] * local_no[2]; l++)
    cdata[l] /= 2;

  /* Print error of back transformed data */
  MPI_Barrier(comm_cart_2d);
  err = pfft_check_output_complex_3d(n, local_no, local_o_start, cdata, comm_cart_2d);
  pfft_printf(comm_cart_2d, "Error after one gcell exchange and reduce of logical size n=(%td, %td, %td),\n", n[0], n[1], n[2]); 
  pfft_printf(comm_cart_2d, "physical size pn=(%td, %td, %td),\n", n[0], n[1], n[2]/2+1); 
  pfft_printf(comm_cart_2d, "gc_below = (%td, %td, %td), gc_above = (%td, %td, %td):\n", gc_below[0], gc_below[1], gc_below[2], gc_above[0], gc_above[1], gc_above[2]); 
  pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err);


  /* free mem and finalize */
  pfft_destroy_gcplan(ths);
  MPI_Comm_free(&comm_cart_2d);
  pfft_free(cdata);
  MPI_Finalize();
  return 0;
}
Ejemplo n.º 29
0
int main(int argc, char *argv[])
{
    int errs = 0;
    int attrval;
    int i, key[32], keyval, saveKeyval;
    MPI_Comm comm, dupcomm;
    MTest_Init(&argc, &argv);

    while (MTestGetIntracomm(&comm, 1)) {
        if (comm == MPI_COMM_NULL)
            continue;

        MPI_Comm_create_keyval(copy_fn, delete_fn, &keyval, (void *) 0);
        saveKeyval = keyval;    /* in case we need to free explicitly */
        attrval = 1;
        MPI_Comm_set_attr(comm, keyval, (void *) &attrval);
        /* See MPI-1, 5.7.1.  Freeing the keyval does not remove it if it
         * is in use in an attribute */
        MPI_Comm_free_keyval(&keyval);

        /* We create some dummy keyvals here in case the same keyval
         * is reused */
        for (i = 0; i < 32; i++) {
            MPI_Comm_create_keyval(MPI_NULL_COPY_FN, MPI_NULL_DELETE_FN, &key[i], (void *) 0);
        }

        MPI_Comm_dup(comm, &dupcomm);
        /* Check that the attribute was copied */
        if (attrval != 2) {
            errs++;
            printf("Attribute not incremented when comm dup'ed (%s)\n", MTestGetIntracommName());
        }
        MPI_Comm_free(&dupcomm);
        if (attrval != 1) {
            errs++;
            printf("Attribute not decremented when dupcomm %s freed\n", MTestGetIntracommName());
        }
        /* Check that the attribute was freed in the dupcomm */

        if (comm != MPI_COMM_WORLD && comm != MPI_COMM_SELF) {
            MPI_Comm_free(&comm);
            /* Check that the original attribute was freed */
            if (attrval != 0) {
                errs++;
                printf("Attribute not decremented when comm %s freed\n", MTestGetIntracommName());
            }
        }
        else {
            /* Explicitly delete the attributes from world and self */
            MPI_Comm_delete_attr(comm, saveKeyval);
        }
        /* Free those other keyvals */
        for (i = 0; i < 32; i++) {
            MPI_Comm_free_keyval(&key[i]);
        }
    }
    MTest_Finalize(errs);
    MPI_Finalize();

    /* The attributes on comm self and world were deleted by finalize
     * (see separate test) */

    return 0;

}
Ejemplo n.º 30
0
/* This test spawns two child jobs and has them open a port and connect to 
 * each other.
 * The two children repeatedly connect, accept, and disconnect from each other.
 */
int main(int argc, char *argv[])
{
    int error;
    int rank, size;
    int numprocs = 3;
    char *argv1[2] = { (char*)"connector", NULL };
    char *argv2[2] = { (char*)"acceptor", NULL };
    MPI_Comm comm_connector, comm_acceptor, comm_parent, comm;
    char port[MPI_MAX_PORT_NAME] = {0};
    MPI_Status status;
    MPI_Info spawn_path = MPI_INFO_NULL;
    int i, num_loops = 100;
    int data;
    int verbose = 0;

    if (getenv("MPITEST_VERBOSE"))
    {
	verbose = 1;
    }

    IF_VERBOSE(("init.\n"));
    error = MPI_Init(&argc, &argv);
    check_error(error, "MPI_Init");

    IF_VERBOSE(("size.\n"));
    error = MPI_Comm_size(MPI_COMM_WORLD, &size);
    check_error(error, "MPI_Comm_size");

    IF_VERBOSE(("rank.\n"));
    error = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    check_error(error, "MPI_Comm_rank");

    if (argc == 1)
    {
	/* Make sure that the current directory is in the path.
	   Not all implementations may honor or understand this, but
	   it is highly recommended as it gives users a clean way
	   to specify the location of the executable without
	   specifying a particular directory format (e.g., this 
	   should work with both Windows and Unix implementations) */
	MPI_Info_create( &spawn_path );
	MPI_Info_set( spawn_path, (char*)"path", (char*)"." );

	IF_VERBOSE(("spawn connector.\n"));
	error = MPI_Comm_spawn((char*)"disconnect_reconnect2",
			       argv1, numprocs, spawn_path, 0, 
			       MPI_COMM_WORLD, &comm_connector, 
			       MPI_ERRCODES_IGNORE);
	check_error(error, "MPI_Comm_spawn");

	IF_VERBOSE(("spawn acceptor.\n"));
	error = MPI_Comm_spawn((char*)"disconnect_reconnect2",
			       argv2, numprocs, spawn_path, 0, 
			       MPI_COMM_WORLD, &comm_acceptor, 
			       MPI_ERRCODES_IGNORE);
	check_error(error, "MPI_Comm_spawn");
	MPI_Info_free( &spawn_path );

	if (rank == 0)
	{
	    IF_VERBOSE(("recv port.\n"));
	    error = MPI_Recv(port, MPI_MAX_PORT_NAME, MPI_CHAR, 0, 0, 
		comm_acceptor, &status);
	    check_error(error, "MPI_Recv");

	    IF_VERBOSE(("send port.\n"));
	    error = MPI_Send(port, MPI_MAX_PORT_NAME, MPI_CHAR, 0, 0, 
		comm_connector);
	    check_error(error, "MPI_Send");
	}

	IF_VERBOSE(("barrier acceptor.\n"));
	error = MPI_Barrier(comm_acceptor);
	check_error(error, "MPI_Barrier");

	IF_VERBOSE(("barrier connector.\n"));
	error = MPI_Barrier(comm_connector);
	check_error(error, "MPI_Barrier");

        error = MPI_Comm_free(&comm_acceptor);
	check_error(error, "MPI_Comm_free");
        error = MPI_Comm_free(&comm_connector);
	check_error(error, "MPI_Comm_free");

	if (rank == 0)
	{
	    printf(" No Errors\n");
	    fflush(stdout);
	}
    }
    else if ((argc == 2) && (strcmp(argv[1], "acceptor") == 0))
    {
	IF_VERBOSE(("get_parent.\n"));
	error = MPI_Comm_get_parent(&comm_parent);
	check_error(error, "MPI_Comm_get_parent");
	if (comm_parent == MPI_COMM_NULL)
	{
	    printf("acceptor's parent is NULL.\n");fflush(stdout);
	    MPI_Abort(MPI_COMM_WORLD, -1);
	}
	if (rank == 0)
	{
	    IF_VERBOSE(("open_port.\n"));
	    error = MPI_Open_port(MPI_INFO_NULL, port);
	    check_error(error, "MPI_Open_port");

	    IF_VERBOSE(("0: opened port: <%s>\n", port));
	    IF_VERBOSE(("send.\n"));
	    error = MPI_Send(port, MPI_MAX_PORT_NAME, MPI_CHAR, 0, 0, comm_parent);
	    check_error(error, "MPI_Send");
	}

	for (i=0; i<num_loops; i++)
	{
	    IF_VERBOSE(("accept.\n"));
	    error = MPI_Comm_accept(port, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &comm);
	    check_error(error, "MPI_Comm_accept");

	    if (rank == 0)
	    {
		data = i;
		error = MPI_Send(&data, 1, MPI_INT, 0, 0, comm);
		check_error(error, "MPI_Send");
		error = MPI_Recv(&data, 1, MPI_INT, 0, 0, comm, &status);
		check_error(error, "MPI_Recv");
		if (data != i)
		{
		    printf("expected %d but received %d\n", i, data);
		    fflush(stdout);
		    MPI_Abort(MPI_COMM_WORLD, 1);
		}
	    }

	    IF_VERBOSE(("disconnect.\n"));
	    error = MPI_Comm_disconnect(&comm);
	    check_error(error, "MPI_Comm_disconnect");
	}

	if (rank == 0)
	{
	    IF_VERBOSE(("close_port.\n"));
	    error = MPI_Close_port(port);
	    check_error(error, "MPI_Close_port");
	}

	IF_VERBOSE(("barrier.\n"));
	error = MPI_Barrier(comm_parent);
	check_error(error, "MPI_Barrier");

	MPI_Comm_free( &comm_parent );
    }
    else if ((argc == 2) && (strcmp(argv[1], "connector") == 0))
    {
	IF_VERBOSE(("get_parent.\n"));
	error = MPI_Comm_get_parent(&comm_parent);
	check_error(error, "MPI_Comm_get_parent");
	if (comm_parent == MPI_COMM_NULL)
	{
	    printf("acceptor's parent is NULL.\n");fflush(stdout);
	    MPI_Abort(MPI_COMM_WORLD, -1);
	}

	if (rank == 0)
	{
	    IF_VERBOSE(("recv.\n"));
	    error = MPI_Recv(port, MPI_MAX_PORT_NAME, MPI_CHAR, 0, 0, 
		comm_parent, &status);
	    check_error(error, "MPI_Recv");
	    IF_VERBOSE(("1: received port: <%s>\n", port));
	}

	for (i=0; i<num_loops; i++)
	{
	    IF_VERBOSE(("connect.\n"));
	    error = MPI_Comm_connect(port, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &comm);
	    check_error(error, "MPI_Comm_connect");

	    if (rank == 0)
	    {
		data = -1;
		error = MPI_Recv(&data, 1, MPI_INT, 0, 0, comm, &status);
		check_error(error, "MPI_Recv");
		if (data != i)
		{
		    printf("expected %d but received %d\n", i, data);
		    fflush(stdout);
		    MPI_Abort(MPI_COMM_WORLD, 1);
		}
		error = MPI_Send(&data, 1, MPI_INT, 0, 0, comm);
		check_error(error, "MPI_Send");
	    }

	    IF_VERBOSE(("disconnect.\n"));
	    error = MPI_Comm_disconnect(&comm);
	    check_error(error, "MPI_Comm_disconnect");
	}

	IF_VERBOSE(("barrier.\n"));
	error = MPI_Barrier(comm_parent);
	check_error(error, "MPI_Barrier");

	MPI_Comm_free( &comm_parent );
    }
    else
    {
	printf("invalid command line.\n");fflush(stdout);
	{
	    int ii;
	    for (ii=0; ii<argc; ii++)
	    {
		printf("argv[%d] = <%s>\n", ii, argv[ii]);
	    }
	}
	fflush(stdout);
	MPI_Abort(MPI_COMM_WORLD, -2);
    }

    MPI_Finalize();
    return 0;
}