void sendp2() { static MPI_Request req[4]; static int flag = 0; if(! flag){ #pragma acc data present(sendp2_lo_recvbuf[0:imax*kmax], sendp2_lo_sendbuf[0:imax*kmax], sendp2_hi_sendbuf[0:imax*kmax], sendp2_hi_recvbuf[0:imax*kmax]) #pragma acc host_data use_device(sendp2_lo_sendbuf, sendp2_lo_recvbuf, sendp2_hi_sendbuf, sendp2_hi_recvbuf) { MPI_Recv_init(sendp2_hi_recvbuf, 1, ikvec, npy[1], 1, mpi_comm_cart, req+2); MPI_Recv_init(sendp2_lo_recvbuf, 1, ikvec, npy[0], 2, mpi_comm_cart, req+0); MPI_Send_init(sendp2_hi_sendbuf, 1, ikvec, npy[0], 1, mpi_comm_cart, req+3); MPI_Send_init(sendp2_lo_sendbuf, 1, ikvec, npy[1], 2, mpi_comm_cart, req+1); flag = 1; } } sendp2_pack(); MPI_Startall(4, req); MPI_Waitall(4, req, MPI_STATUSES_IGNORE); sendp2_unpack(); }
void sendp1() { static MPI_Request req[4]; static int flag = 0; if(! flag){ #pragma acc data present(p) #pragma acc host_data use_device(p) { MPI_Recv_init(&p[imax-1][0][0], 1, jkvec, npx[1], 1, mpi_comm_cart, req+2); MPI_Recv_init(&p[0][0][0], 1, jkvec, npx[0], 2, mpi_comm_cart, req+0); MPI_Send_init(&p[1][0][0], 1, jkvec, npx[0], 1, mpi_comm_cart, req+3); MPI_Send_init(&p[imax-2][0][0], 1, jkvec, npx[1], 2, mpi_comm_cart, req+1); flag = 1; } } MPI_Startall(4, req); MPI_Waitall(4, req, MPI_STATUSES_IGNORE); }
void mpi_send_init_(void *buf, int* count, int* datatype, int* dst, int* tag, int* comm, int* request, int* ierr) { MPI_Request req; *ierr = MPI_Send_init(buf, *count, get_datatype(*datatype), *dst, *tag, get_comm(*comm), &req); if(*ierr == MPI_SUCCESS) { *request = new_request(req); } }
/** * Declare a message handle for sending to a node displaced in (x,y,z,t) according to "displacement" */ MsgHandle *comm_declare_send_displaced(void *buffer, const int displacement[], size_t nbytes) { Topology *topo = comm_default_topology(); int rank = comm_rank_displaced(topo, displacement); int tag = comm_rank(); MsgHandle *mh = (MsgHandle *)safe_malloc(sizeof(MsgHandle)); MPI_CHECK( MPI_Send_init(buffer, nbytes, MPI_BYTE, rank, tag, MPI_COMM_WORLD, &(mh->request)) ); return mh; }
/* * This example causes the IBM SP2 MPI version to generate the message * ERROR: 0032-158 Persistent request already active (2) in MPI_Startall, task 0 * in the SECOND set of MPI_Startall (after the MPI_Request_free). */ int main( int argc, char **argv ) { MPI_Request r[4]; MPI_Status statuses[4]; double sbuf1[10], sbuf2[10]; double rbuf1[10], rbuf2[10]; int size, rank, up_nbr, down_nbr, i; MPI_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &size ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); up_nbr = (rank + 1) % size; down_nbr = (size + rank - 1) % size; MPI_Recv_init( rbuf1, 10, MPI_DOUBLE, down_nbr, 0, MPI_COMM_WORLD, &r[0] ); MPI_Recv_init( rbuf2, 10, MPI_DOUBLE, up_nbr, 1, MPI_COMM_WORLD, &r[1] ); MPI_Send_init( sbuf1, 10, MPI_DOUBLE, up_nbr, 0, MPI_COMM_WORLD, &r[2] ); MPI_Send_init( sbuf2, 10, MPI_DOUBLE, down_nbr, 1, MPI_COMM_WORLD, &r[3] ); MPI_Startall( 4, r ); MPI_Waitall( 4, r, statuses ); for (i=0; i<4; i++) { MPI_Request_free( &r[i] ); } MPI_Recv_init( rbuf1, 10, MPI_DOUBLE, down_nbr, 0, MPI_COMM_WORLD, &r[0] ); MPI_Recv_init( rbuf2, 10, MPI_DOUBLE, up_nbr, 1, MPI_COMM_WORLD, &r[1] ); MPI_Send_init( sbuf1, 10, MPI_DOUBLE, up_nbr, 0, MPI_COMM_WORLD, &r[2] ); MPI_Send_init( sbuf2, 10, MPI_DOUBLE, down_nbr, 1, MPI_COMM_WORLD, &r[3] ); MPI_Startall( 4, r ); MPI_Waitall( 4, r, statuses ); for (i=0; i<4; i++) { MPI_Request_free( &r[i] ); } if (rank == 0) printf( "No errors\n" ); MPI_Finalize(); return 0; }
HYPRE_Int hypre_MPI_Send_init( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, HYPRE_Int dest, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ) { return (HYPRE_Int) MPI_Send_init(buf, (hypre_int)count, datatype, (hypre_int)dest, (hypre_int)tag, comm, request); }
/** Send to the "dir" direction in the "dim" dimension */ void* comm_declare_send_relative(void *buffer, int dim, int dir, size_t count) { int back_nbr[4] = {X_BACK_NBR,Y_BACK_NBR,Z_BACK_NBR,T_BACK_NBR}; int fwd_nbr[4] = {X_FWD_NBR,Y_FWD_NBR,Z_FWD_NBR,T_FWD_NBR}; int downtags[4] = {XDOWN, YDOWN, ZDOWN, TDOWN}; int uptags[4] = {XUP, YUP, ZUP, TUP}; MPI_Request *request = (MPI_Request*)safe_malloc(sizeof(MPI_Request)); int tag = (dir == 1) ? uptags[dim] : downtags[dim]; int dst = (dir == 1) ? fwd_nbr[dim] : back_nbr[dim]; int dstproc = find_neighbor_proc(dst); MPI_Send_init(buffer, count, MPI_BYTE, dstproc, tag, MPI_COMM_WORLD, request); return (void*)request; }
int main(int argc, char **argv) { int rank, touch; if (argc != 2) { fprintf(stderr, "Invalid arg\n"); return -1; } MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); int *buffer = (int*) malloc(127 * sizeof(int)); MPI_Request r; if (!strcmp(argv[1], "recv")) { MPI_Recv(buffer, 128, MPI_INT, 0, 10, MPI_COMM_WORLD, MPI_STATUSES_IGNORE); } if (!strcmp(argv[1], "send")) { MPI_Send(buffer, 128, MPI_INT, 0, 10, MPI_COMM_WORLD); } if (!strcmp(argv[1], "recv-lock")) { MPI_Request r; MPI_Irecv(buffer, 10, MPI_INT, 0, 10, MPI_COMM_WORLD, &r); MPI_Recv(buffer, 128, MPI_INT, 0, 10, MPI_COMM_WORLD, MPI_STATUSES_IGNORE); } if (!strcmp(argv[1], "send-lock")) { MPI_Request r; MPI_Irecv(buffer, 10, MPI_INT, 0, 10, MPI_COMM_WORLD, &r); MPI_Send(buffer, 128, MPI_INT, 0, 10, MPI_COMM_WORLD); } if (!strcmp(argv[1], "persistent-recv")) { MPI_Request r; MPI_Recv_init(buffer, 10, MPI_INT, 0, 10, MPI_COMM_WORLD, &r); free(buffer); MPI_Start(&r); } if (!strcmp(argv[1], "persistent-send")) { MPI_Request r; MPI_Send_init(buffer, 10, MPI_INT, 0, 10, MPI_COMM_WORLD, &r); free(buffer); MPI_Start(&r); } return 0; }
void create_right_communication(){ int send_to_coords[2]; int send_to_rank; int recv_from_coords[2]; int recv_from_rank; send_to_coords[0] = my_coords[0]; send_to_coords[1] = my_coords[1] + 1; recv_from_coords[0] = my_coords[0]; recv_from_coords[1] = my_coords[1] - 1; if(is_valid_coord(send_to_coords)){ MPI_Cart_rank( grid_comm, send_to_coords, &send_to_rank ); MPI_Send_init( &grid[n], 1, column_t, send_to_rank, TAG, grid_comm, &horizontal_reqs[horizontal_req_index++] ); } recv_right_buffer = (int*) calloc(m+2, sizeof(int)); //initialize to 0 if(is_valid_coord(recv_from_coords)){ MPI_Cart_rank( grid_comm, recv_from_coords, &recv_from_rank ); MPI_Recv_init( recv_right_buffer, m + 2, MPI_INT, recv_from_rank, TAG, grid_comm, &horizontal_reqs[horizontal_req_index++] ); } }
void create_down_communication(){ int send_to_coords[2]; int send_to_rank; int recv_from_coords[2]; int recv_from_rank; send_to_coords[0] = my_coords[0] + 1; send_to_coords[1] = my_coords[1]; recv_from_coords[0] = my_coords[0] - 1; recv_from_coords[1] = my_coords[1]; if(is_valid_coord(send_to_coords){ MPI_Cart_rank( grid_comm, send_to_coords, &send_to_rank ); MPI_Send_init( &grid[((n+2)*(m+2)) - (2 * n + 3)],//n+3], n, MPI_INT, send_to_rank, TAG, grid_comm, &vertical_reqs[vertical_req_index++] ); } if(is_valid_coord(recv_from_coords)){ MPI_Cart_rank( grid_comm, recv_from_coords, &recv_from_rank ); MPI_Recv_init( &grid[1], n, MPI_INT, recv_from_rank, TAG, grid_comm, &vertical_reqs[vertical_req_index++] ); } }
/** * Declare a message handle for sending to a node displaced in (x,y,z,t) according to "displacement" */ MsgHandle *comm_declare_strided_send_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride) { Topology *topo = comm_default_topology(); int rank = comm_rank_displaced(topo, displacement); int tag = comm_rank(); MsgHandle *mh = (MsgHandle *)safe_malloc(sizeof(MsgHandle)); // create a new strided MPI type MPI_CHECK( MPI_Type_vector(nblocks, blksize, stride, MPI_BYTE, &(mh->datatype)) ); MPI_CHECK( MPI_Type_commit(&(mh->datatype)) ); MPI_CHECK( MPI_Send_init(buffer, 1, mh->datatype, rank, tag, MPI_COMM_WORLD, &(mh->request)) ); return mh; }
void ompi_send_init_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; MPI_Datatype c_type = MPI_Type_f2c(*datatype); MPI_Request c_req; MPI_Comm c_comm; c_comm = MPI_Comm_f2c (*comm); c_ierr = MPI_Send_init(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*dest), OMPI_FINT_2_INT(*tag), c_comm, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { *request = MPI_Request_c2f(c_req); } }
int main( int argc, char **argv ) { MPI_Request r1; int size, rank; int err = 0; int partner, buf[10], flag, idx, index; MPI_Status status; MPI_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &size ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); if (size < 2) { printf( "Cancel test requires at least 2 processes\n" ); MPI_Abort( MPI_COMM_WORLD, 1 ); } /* * Here is the test. First, we ensure an unsatisfied Irecv: * process 0 process size-1 * Sendrecv Sendrecv * Irecv ---- * Cancel ---- * Sendrecv Sendrecv * Next, we confirm receipt before canceling * Irecv Send * Sendrecv Sendrecv * Cancel */ if (rank == 0) { partner = size - 1; /* Cancel succeeds for wait/waitall */ MPI_Send_init( buf, 10, MPI_INT, partner, 0, MPI_COMM_WORLD, &r1 ); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_COMM_WORLD, &status ); MPI_Start( &r1 ); MPI_Cancel( &r1 ); MPI_Wait( &r1, &status ); MPI_Test_cancelled( &status, &flag ); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_COMM_WORLD, &status ); if (!flag) { err++; printf( "Cancel of a send failed where it should succeed (Wait).\n" ); } MPI_Request_free( &r1 ); /* Cancel fails for test/testall */ buf[0] = 3; MPI_Send_init( buf, 3, MPI_INT, partner, 2, MPI_COMM_WORLD, &r1 ); MPI_Start( &r1 ); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_COMM_WORLD, &status ); MPI_Cancel( &r1 ); MPI_Test( &r1, &flag, &status ); MPI_Test_cancelled( &status, &flag ); if (flag) { err++; printf( "Cancel of a send succeeded where it shouldn't (Test).\n" ); } MPI_Request_free( &r1 ); /* Cancel succeeds for waitany */ MPI_Send_init( buf, 10, MPI_INT, partner, 0, MPI_COMM_WORLD, &r1 ); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_COMM_WORLD, &status ); MPI_Start( &r1 ); MPI_Cancel( &r1 ); MPI_Waitany( 1, &r1, &idx, &status ); MPI_Test_cancelled( &status, &flag ); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_COMM_WORLD, &status ); if (!flag) { err++; printf( "Cancel of a send failed where it should succeed (Waitany).\n" ); } MPI_Request_free( &r1 ); /* Cancel fails for testany */ buf[0] = 3; MPI_Send_init( buf, 3, MPI_INT, partner, 2, MPI_COMM_WORLD, &r1 ); MPI_Start( &r1 ); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_COMM_WORLD, &status ); MPI_Cancel( &r1 ); MPI_Testany( 1, &r1, &idx, &flag, &status ); MPI_Test_cancelled( &status, &flag ); if (flag) { err++; printf( "Cancel of a send succeeded where it shouldn't (Testany).\n" ); } MPI_Request_free( &r1 ); /* Cancel succeeds for waitsome */ MPI_Send_init( buf, 10, MPI_INT, partner, 0, MPI_COMM_WORLD, &r1 ); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_COMM_WORLD, &status ); MPI_Start( &r1 ); MPI_Cancel( &r1 ); MPI_Waitsome( 1, &r1, &idx, &index, &status ); MPI_Test_cancelled( &status, &flag ); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_COMM_WORLD, &status ); if (!flag) { err++; printf( "Cancel of a send failed where it should succeed (Waitsome).\n" ); } MPI_Request_free( &r1 ); /* Cancel fails for testsome*/ buf[0] = 3; MPI_Send_init( buf, 3, MPI_INT, partner, 2, MPI_COMM_WORLD, &r1 ); MPI_Start( &r1 ); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_COMM_WORLD, &status ); MPI_Cancel( &r1 ); MPI_Testsome( 1, &r1, &idx, &index, &status ); MPI_Test_cancelled( &status, &flag ); if (flag) { err++; printf( "Cancel of a send succeeded where it shouldn't (Testsome).\n" ); } MPI_Request_free( &r1 ); if (err) { printf( "Test failed with %d errors.\n", err ); } else { printf( "Test passed\n" ); } } else if (rank == size - 1) { partner = 0; /* Cancel succeeds for wait/waitall */ MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_COMM_WORLD, &status ); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_COMM_WORLD, &status ); /* Cancel fails for test/testall */ buf[0] = -1; MPI_Recv( buf, 3, MPI_INT, partner, 2, MPI_COMM_WORLD, &status ); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_COMM_WORLD, &status ); if (buf[0] == -1) { printf( "Receive buffer did not change even though cancel should not have suceeded! (Test).\n" ); } /* Cancel succeeds for waitany */ MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_COMM_WORLD, &status ); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_COMM_WORLD, &status ); /* Cancel fails for testany */ buf[0] = -1; MPI_Recv( buf, 3, MPI_INT, partner, 2, MPI_COMM_WORLD, &status ); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_COMM_WORLD, &status ); if (buf[0] == -1) { printf( "Receive buffer did not change even though cancel should not have suceeded! (Testany).\n" ); } /* Cancel succeeds for waitsome */ MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_COMM_WORLD, &status ); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_COMM_WORLD, &status ); /* Cancel fails for testsome */ buf[0] = -1; MPI_Recv( buf, 3, MPI_INT, partner, 2, MPI_COMM_WORLD, &status ); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_BOTTOM, 0, MPI_INT, partner, 1, MPI_COMM_WORLD, &status ); if (buf[0] == -1) { printf( "Receive buffer did not change even though cancel should not have suceeded! (Test).\n" ); } } MPI_Finalize(); return 0; }
int main (int argc, char *argv[]) { int ierr; int rank; static char buffer[80]; MPI_Request req = MPI_REQUEST_NULL; MPI_Status status, status2; #ifdef V_T double ts; int messageframe; #endif ierr = MPI_Init(&argc,&argv); #ifdef V_T ts = VT_timestamp(); #endif /* this used to be buggy... */ MPI_Wait( &req, &status ); ierr = MPI_Barrier(MPI_COMM_WORLD); test_pair(); MPI_Comm_rank ( MPI_COMM_WORLD, &rank ); if ( getenv ("VT_ABORT_BEFORE_FINALIZE") ) { if ( atoi ( getenv ("VT_ABORT_BEFORE_FINALIZE") ) < 2 ) MPI_Abort( MPI_COMM_WORLD, 10 ); if ( !rank ) { *((char *)NULL) = 0; } else { MPI_Barrier ( MPI_COMM_WORLD ); } } /* test some other aspects of message transfer: persistent send with MPI_PROC_NULL */ MPI_Send_init( &ierr, 1, MPI_INT, MPI_PROC_NULL, 100, MPI_COMM_WORLD, &req ); MPI_Start( &req ); MPI_Wait( &req, &status ); MPI_Start( &req ); MPI_Wait( &req, &status ); MPI_Request_free( &req ); /* persistent receive with MPI_PROC_NULL */ MPI_Recv_init( &ierr, 1, MPI_INT, MPI_PROC_NULL, 100, MPI_COMM_WORLD, &req ); MPI_Start( &req ); MPI_Wait( &req, &status ); MPI_Start( &req ); MPI_Wait( &req, &status ); MPI_Request_free( &req ); /* real reuse of persistent communication */ if( rank & 1 ) { MPI_Recv_init( buffer, sizeof(buffer), MPI_CHAR, rank^1, 101, MPI_COMM_WORLD, &req ); } else { MPI_Send_init( buffer, sizeof(buffer), MPI_CHAR, rank^1, 101, MPI_COMM_WORLD, &req ); } MPI_Start( &req ); MPI_Wait( &req, &status ); MPI_Start( &req ); MPI_Wait( &req, &status ); MPI_Request_free( &req ); /* send to MPI_PROC_NULL */ MPI_Send( buffer, sizeof(buffer), MPI_CHAR, MPI_PROC_NULL, 103, MPI_COMM_WORLD ); /* cancelled receive */ MPI_Irecv( buffer, sizeof(buffer), MPI_CHAR, rank^1, 105, MPI_COMM_WORLD, &req ); MPI_Cancel( &req ); MPI_Wait( &req, &status2 ); #ifdef V_T printf( "Time: %f\n", VT_timestamp()-ts ); #endif ierr = MPI_Finalize(); return ierr; }
int main (int argc, char **argv) { int nprocs = -1; int rank = -1; int comm = MPI_COMM_WORLD; char processor_name[128]; int namelen = 128; int buf[BUF_SIZE * 2]; int i, j, k, index, outcount, flag; int indices[2]; MPI_Request aReq[2]; MPI_Status aStatus[2]; /* init */ MPI_Init (&argc, &argv); MPI_Comm_size (comm, &nprocs); MPI_Comm_rank (comm, &rank); MPI_Get_processor_name (processor_name, &namelen); printf ("(%d) is alive on %s\n", rank, processor_name); fflush (stdout); if (rank == 0) { /* set up persistent sends... */ MPI_Send_init (&buf[0], BUF_SIZE, MPI_INT, 1, 0, comm, &aReq[0]); MPI_Send_init (&buf[BUF_SIZE], BUF_SIZE, MPI_INT, 1, 1, comm, &aReq[1]); /* initialize the send buffers */ for (i = 0; i < BUF_SIZE; i++) { buf[i] = i; buf[BUF_SIZE + i] = BUF_SIZE - 1 - i; } } for (k = 0; k < 4; k++) { if (rank == 1) { /* zero out the receive buffers */ bzero (buf, sizeof(int) * BUF_SIZE * 2); } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { /* start the persistent sends... */ if (k % 2) { MPI_Startall (2, &aReq[0]); } else { for (j = 0; j < 2; j++) { MPI_Start (&aReq[j]); } } /* complete the sends */ if (k < 2) { /* use MPI_Waitany */ for (j = 0; j < 2; j++) MPI_Waitany (2, aReq, &index, aStatus); } else { /* use MPI_Waitsome */ j = 0; while (j < 2) { MPI_Waitsome (2, aReq, &outcount, indices, aStatus); j += outcount; } } } else if (rank == 1) { /* set up receives for all of the sends */ for (j = 0; j < 2; j++) { MPI_Irecv (&buf[j * BUF_SIZE], BUF_SIZE, MPI_INT, 0, j, comm, &aReq[j]); } /* complete all of the receives... */ MPI_Waitall (2, aReq, aStatus); } } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { /* free the persistent requests */ for (i = 0 ; i < 2; i++) { MPI_Request_free (&aReq[i]); } } MPI_Finalize (); printf ("(%d) Finished normally\n", rank); }
int main(int argc, char *argv[]) { MPI_Request r; MPI_Status s; // int flag; int buf[10]; int rbuf[10]; int tag = 27; int dest = 0; int rank, size; MPI_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &size ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); /* Create a persistent send request */ // tout le monde prépare l'envoi à 0 MPI_Send_init( buf, 10, MPI_INT, dest, tag, MPI_COMM_WORLD, &r ); /* Use that request */ if (rank == 0) { // on alloue un tableau de size request pour les irecv MPI_Request *rr = (MPI_Request *)malloc(size * sizeof(MPI_Request)); for (int i=0; i<size; i++) { // 0 va recevoir de tout le monde MPI_Irecv( rbuf, 10, MPI_INT, i, tag, MPI_COMM_WORLD, &rr[i] ); } // 0 va envoyer à 0 MPI_Start( &r ); // 0 envoi à 0 MPI_Wait( &r, &s ); // 0 recoit de tout le monde MPI_Waitall( size, rr, MPI_STATUSES_IGNORE ); free(rr); } else { // non-0 va envoyer à 0 MPI_Start( &r ); // non-0 envoi à 0 MPI_Wait( &r, &s ); } MPI_Request_free( &r ); // if (rank == 0) // { // MPI_Request sr; // /* Create a persistent receive request */ // // 0 prépare la récéption de tout le monde // MPI_Recv_init( rbuf, 10, MPI_INT, MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &r ); // // 0 va envoyer à 0 // MPI_Isend( buf, 10, MPI_INT, 0, tag, MPI_COMM_WORLD, &sr ); // for (int i=0; i<size; i++) { // // 0 va recevoir de tout le monde // MPI_Start( &r ); // // 0 recoit de tout le monde // MPI_Wait( &r, &s ); // } // // 0 envoi à 0 // MPI_Wait( &sr, &s ); // MPI_Request_free( &r ); // } // else { // // non-0 envoi à 0 // MPI_Send( buf, 10, MPI_INT, 0, tag, MPI_COMM_WORLD ); // } MPI_Finalize(); return 0; }
void declareBindings (void) { /* === Point-to-point === */ void* buf; int count; MPI_Datatype datatype; int dest; int tag; MPI_Comm comm; MPI_Send (buf, count, datatype, dest, tag, comm); // L12 int source; MPI_Status status; MPI_Recv (buf, count, datatype, source, tag, comm, &status); // L15 MPI_Get_count (&status, datatype, &count); MPI_Bsend (buf, count, datatype, dest, tag, comm); MPI_Ssend (buf, count, datatype, dest, tag, comm); MPI_Rsend (buf, count, datatype, dest, tag, comm); void* buffer; int size; MPI_Buffer_attach (buffer, size); // L22 MPI_Buffer_detach (buffer, &size); MPI_Request request; MPI_Isend (buf, count, datatype, dest, tag, comm, &request); // L25 MPI_Ibsend (buf, count, datatype, dest, tag, comm, &request); MPI_Issend (buf, count, datatype, dest, tag, comm, &request); MPI_Irsend (buf, count, datatype, dest, tag, comm, &request); MPI_Irecv (buf, count, datatype, source, tag, comm, &request); MPI_Wait (&request, &status); int flag; MPI_Test (&request, &flag, &status); // L32 MPI_Request_free (&request); MPI_Request* array_of_requests; int index; MPI_Waitany (count, array_of_requests, &index, &status); // L36 MPI_Testany (count, array_of_requests, &index, &flag, &status); MPI_Status* array_of_statuses; MPI_Waitall (count, array_of_requests, array_of_statuses); // L39 MPI_Testall (count, array_of_requests, &flag, array_of_statuses); int incount; int outcount; int* array_of_indices; MPI_Waitsome (incount, array_of_requests, &outcount, array_of_indices, array_of_statuses); // L44--45 MPI_Testsome (incount, array_of_requests, &outcount, array_of_indices, array_of_statuses); // L46--47 MPI_Iprobe (source, tag, comm, &flag, &status); // L48 MPI_Probe (source, tag, comm, &status); MPI_Cancel (&request); MPI_Test_cancelled (&status, &flag); MPI_Send_init (buf, count, datatype, dest, tag, comm, &request); MPI_Bsend_init (buf, count, datatype, dest, tag, comm, &request); MPI_Ssend_init (buf, count, datatype, dest, tag, comm, &request); MPI_Rsend_init (buf, count, datatype, dest, tag, comm, &request); MPI_Recv_init (buf, count, datatype, source, tag, comm, &request); MPI_Start (&request); MPI_Startall (count, array_of_requests); void* sendbuf; int sendcount; MPI_Datatype sendtype; int sendtag; void* recvbuf; int recvcount; MPI_Datatype recvtype; MPI_Datatype recvtag; MPI_Sendrecv (sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, &status); // L67--69 MPI_Sendrecv_replace (buf, count, datatype, dest, sendtag, source, recvtag, comm, &status); // L70--71 MPI_Datatype oldtype; MPI_Datatype newtype; MPI_Type_contiguous (count, oldtype, &newtype); // L74 int blocklength; { int stride; MPI_Type_vector (count, blocklength, stride, oldtype, &newtype); // L78 } { MPI_Aint stride; MPI_Type_hvector (count, blocklength, stride, oldtype, &newtype); // L82 } int* array_of_blocklengths; { int* array_of_displacements; MPI_Type_indexed (count, array_of_blocklengths, array_of_displacements, oldtype, &newtype); // L87--88 } { MPI_Aint* array_of_displacements; MPI_Type_hindexed (count, array_of_blocklengths, array_of_displacements, oldtype, &newtype); // L92--93 MPI_Datatype* array_of_types; MPI_Type_struct (count, array_of_blocklengths, array_of_displacements, array_of_types, &newtype); // L95--96 } void* location; MPI_Aint address; MPI_Address (location, &address); // L100 MPI_Aint extent; MPI_Type_extent (datatype, &extent); // L102 MPI_Type_size (datatype, &size); MPI_Aint displacement; MPI_Type_lb (datatype, &displacement); // L105 MPI_Type_ub (datatype, &displacement); MPI_Type_commit (&datatype); MPI_Type_free (&datatype); MPI_Get_elements (&status, datatype, &count); void* inbuf; void* outbuf; int outsize; int position; MPI_Pack (inbuf, incount, datatype, outbuf, outsize, &position, comm); // L114 int insize; MPI_Unpack (inbuf, insize, &position, outbuf, outcount, datatype, comm); // L116--117 MPI_Pack_size (incount, datatype, comm, &size); /* === Collectives === */ MPI_Barrier (comm); // L121 int root; MPI_Bcast (buffer, count, datatype, root, comm); // L123 MPI_Gather (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm); // L124--125 int* recvcounts; int* displs; MPI_Gatherv (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm); // L128--130 MPI_Scatter (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm); // L131--132 int* sendcounts; MPI_Scatterv (sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm); // L134--135 MPI_Allgather (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); // L136--137 MPI_Allgatherv (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); // L138--140 MPI_Alltoall (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); // L141--142 int* sdispls; int* rdispls; MPI_Alltoallv (sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm); // L145--147 MPI_Op op; MPI_Reduce (sendbuf, recvbuf, count, datatype, op, root, comm); // L149 #if 0 MPI_User_function function; int commute; MPI_Op_create (function, commute, &op); // L153 #endif MPI_Op_free (&op); // L155 MPI_Allreduce (sendbuf, recvbuf, count, datatype, op, comm); MPI_Reduce_scatter (sendbuf, recvbuf, recvcounts, datatype, op, comm); MPI_Scan (sendbuf, recvbuf, count, datatype, op, comm); /* === Groups, contexts, and communicators === */ MPI_Group group; MPI_Group_size (group, &size); // L162 int rank; MPI_Group_rank (group, &rank); // L164 MPI_Group group1; int n; int* ranks1; MPI_Group group2; int* ranks2; MPI_Group_translate_ranks (group1, n, ranks1, group2, ranks2); // L170 int result; MPI_Group_compare (group1, group2, &result); // L172 MPI_Group newgroup; MPI_Group_union (group1, group2, &newgroup); // L174 MPI_Group_intersection (group1, group2, &newgroup); MPI_Group_difference (group1, group2, &newgroup); int* ranks; MPI_Group_incl (group, n, ranks, &newgroup); // L178 MPI_Group_excl (group, n, ranks, &newgroup); extern int ranges[][3]; MPI_Group_range_incl (group, n, ranges, &newgroup); // L181 MPI_Group_range_excl (group, n, ranges, &newgroup); MPI_Group_free (&group); MPI_Comm_size (comm, &size); MPI_Comm_rank (comm, &rank); MPI_Comm comm1; MPI_Comm comm2; MPI_Comm_compare (comm1, comm2, &result); MPI_Comm newcomm; MPI_Comm_dup (comm, &newcomm); MPI_Comm_create (comm, group, &newcomm); int color; int key; MPI_Comm_split (comm, color, key, &newcomm); // L194 MPI_Comm_free (&comm); MPI_Comm_test_inter (comm, &flag); MPI_Comm_remote_size (comm, &size); MPI_Comm_remote_group (comm, &group); MPI_Comm local_comm; int local_leader; MPI_Comm peer_comm; int remote_leader; MPI_Comm newintercomm; MPI_Intercomm_create (local_comm, local_leader, peer_comm, remote_leader, tag, &newintercomm); // L204--205 MPI_Comm intercomm; MPI_Comm newintracomm; int high; MPI_Intercomm_merge (intercomm, high, &newintracomm); // L209 int keyval; #if 0 MPI_Copy_function copy_fn; MPI_Delete_function delete_fn; void* extra_state; MPI_Keyval_create (copy_fn, delete_fn, &keyval, extra_state); // L215 #endif MPI_Keyval_free (&keyval); // L217 void* attribute_val; MPI_Attr_put (comm, keyval, attribute_val); // L219 MPI_Attr_get (comm, keyval, attribute_val, &flag); MPI_Attr_delete (comm, keyval); /* === Environmental inquiry === */ char* name; int resultlen; MPI_Get_processor_name (name, &resultlen); // L226 MPI_Errhandler errhandler; #if 0 MPI_Handler_function function; MPI_Errhandler_create (function, &errhandler); // L230 #endif MPI_Errhandler_set (comm, errhandler); // L232 MPI_Errhandler_get (comm, &errhandler); MPI_Errhandler_free (&errhandler); int errorcode; char* string; MPI_Error_string (errorcode, string, &resultlen); // L237 int errorclass; MPI_Error_class (errorcode, &errorclass); // L239 MPI_Wtime (); MPI_Wtick (); int argc; char** argv; MPI_Init (&argc, &argv); // L244 MPI_Finalize (); MPI_Initialized (&flag); MPI_Abort (comm, errorcode); }
int main (int argc, char *argv[]) { MPI_Request reqSR[4], reqRR[4], reqSF[4], reqRF[4]; MPI_Status statRR[4], statRF[4], statSR[4], statSF[4]; MPI_Comm cartcomm; int n_proc, nbrs[4], dims[2], periods[2]={1,1}, reorder=1; int landNS, landWE, err,i; float sumFox, sumRabb, nbrab, nbfox, model[2][3]; double time; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &n_proc); if(rank==0){ time= MPI_Wtime(); printf("N_proc:%d",n_proc); } /**************************************************** ********** CASO DE 1 PROCESSO ****************** ***************************************************/ if (n_proc==1) { echoSingle(); }else{ /**************************************************** **********+++ MULTI PROCESSOS ****************** ***************************************************/ int lado = sqrt(n_proc); dims[0] = lado; dims[1] = lado; if((lado * lado) != n_proc){ if(rank==0) printf("ERRO: Numero incorreto de processos\n"); MPI_Finalize(); exit(0); } MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, reorder, &cartcomm); MPI_Comm_rank(cartcomm, &rank); MPI_Cart_coords(cartcomm, rank, 2, coords); MPI_Cart_shift(cartcomm, 0, 1, &nbrs[UP], &nbrs[DOWN]); MPI_Cart_shift(cartcomm, 1, 1, &nbrs[LEFT], &nbrs[RIGHT]); //Actualizar offsets de cada processo landNS = offsetNS = NS_Size / lado; landWE = offsetWE = WE_Size / lado; if(coords[0] == (lado-1)){ offsetNS += NS_Size % lado; } if(coords[1] == (lado-1)){ offsetWE += WE_Size % lado; } //Buffers para envio e receção de dados float buf_sendFoxN[offsetWE],buf_sendFoxS[offsetWE],buf_sendFoxW[offsetNS],buf_sendFoxE[offsetNS]; float buf_recvFoxN[offsetWE],buf_recvFoxS[offsetWE],buf_recvFoxW[offsetNS],buf_recvFoxE[offsetNS]; float buf_sendRabbitN[offsetWE],buf_sendRabbitS[offsetWE],buf_sendRabbitW[offsetNS],buf_sendRabbitE[offsetNS]; float buf_recvRabbitN[offsetWE],buf_recvRabbitS[offsetWE],buf_recvRabbitW[offsetNS],buf_recvRabbitE[offsetNS]; float Rabbit[offsetNS+2][offsetWE+2]; float Fox[offsetNS+2][offsetWE+2]; /* The next two arrays are used in function Evolve() to compute * the next generation of rabbits and foxes. */ float TRabbit[offsetNS+2][offsetWE+2]; float TFox[offsetNS+2][offsetWE+2]; //Inicialização das comunicações //********* Raposas ************** //Enviar //Cima e baixo MPI_Send_init(&buf_sendFoxN[0], offsetWE, MPI_FLOAT, nbrs[UP], 0, cartcomm, &reqSF[UP]); MPI_Send_init(&buf_sendFoxS[0], offsetWE, MPI_FLOAT, nbrs[DOWN], 0, cartcomm, &reqSF[DOWN]); //Esquerda e direita MPI_Send_init(&buf_sendFoxW[0], offsetNS, MPI_FLOAT, nbrs[LEFT], 0, cartcomm, &reqSF[LEFT]); MPI_Send_init(&buf_sendFoxE[0], offsetNS, MPI_FLOAT, nbrs[RIGHT], 0, cartcomm, &reqSF[RIGHT]); //Receber //Cima e Baixo MPI_Recv_init(&buf_recvFoxS[0], offsetWE, MPI_FLOAT, nbrs[DOWN], 0, cartcomm, &reqRF[DOWN]); MPI_Recv_init(&buf_recvFoxN[0], offsetWE, MPI_FLOAT, nbrs[UP], 0, cartcomm, &reqRF[UP]); //Esquerda e direita MPI_Recv_init(&buf_recvFoxE[0], offsetNS, MPI_FLOAT, nbrs[RIGHT], 0, cartcomm, &reqRF[RIGHT]); MPI_Recv_init(&buf_recvFoxW[0], offsetNS, MPI_FLOAT, nbrs[LEFT], 0, cartcomm, &reqRF[LEFT]); //********* Coelhos *************** //Enviar //Cima e baixo MPI_Send_init(&buf_sendRabbitN[0], offsetWE, MPI_FLOAT, nbrs[UP], 0, cartcomm, &reqSR[UP]); MPI_Send_init(&buf_sendRabbitS[0], offsetWE, MPI_FLOAT, nbrs[DOWN], 0, cartcomm, &reqSR[DOWN]); //Esquerda e direita MPI_Send_init(&buf_sendRabbitW[0], offsetNS, MPI_FLOAT, nbrs[LEFT], 0, cartcomm, &reqSR[LEFT]); MPI_Send_init(&buf_sendRabbitE[0], offsetNS, MPI_FLOAT, nbrs[RIGHT], 0, cartcomm, &reqSR[RIGHT]); //Receber //Cima e Baixo MPI_Recv_init(&buf_recvRabbitS[0], offsetWE, MPI_FLOAT, nbrs[DOWN], 0, cartcomm, &reqRR[DOWN]); MPI_Recv_init(&buf_recvRabbitN[0], offsetWE, MPI_FLOAT, nbrs[UP], 0, cartcomm, &reqRR[UP]); //Esquerda e direita MPI_Recv_init(&buf_recvRabbitE[0], offsetNS, MPI_FLOAT, nbrs[RIGHT], 0, cartcomm, &reqRR[RIGHT]); MPI_Recv_init(&buf_recvRabbitW[0], offsetNS, MPI_FLOAT, nbrs[LEFT], 0, cartcomm, &reqRR [LEFT]); /* Initialise the problem. */ err = SetLand(Rabbit,Fox,model,landNS, landWE); // Iterate. for( k=1; k<=NITER; k++) { /****************************************************** **** Começa comunicação de actualização ******** ******************************************************/ //************** Envios ***************/ //Raposas //Cima e baixo for(i=1; i <= offsetWE; i++) buf_sendFoxN[i-1] = Fox[1][i]; MPI_Start(&reqSF[UP]); for(i=1; i <= offsetWE; i++) buf_sendFoxS[i-1] = Fox[offsetNS][i]; MPI_Start(&reqSF[DOWN]); //Esquerda e direita for(i=1; i <= offsetNS; i++) buf_sendFoxW[i-1] = Fox[i][1]; MPI_Start(&reqSF[LEFT]); for(i=1; i <= offsetNS; i++) buf_sendFoxE[i-1] = Fox[i][offsetWE]; MPI_Start(&reqSF[RIGHT]); //Coelhos //Cima e baixo for(i=1; i <= offsetWE; i++) buf_sendRabbitN[i-1] = Rabbit[1][i]; MPI_Start(&reqSR[UP]); for(i=1; i <= offsetWE; i++) buf_sendRabbitS[i-1] = Rabbit[offsetNS][i]; MPI_Start(&reqSR[DOWN]); //Esquerda e direita for(i=1; i <= offsetNS; i++) buf_sendRabbitW[i-1] = Rabbit[i][1]; MPI_Start(&reqSR[LEFT]); for(i=1; i <= offsetNS; i++) buf_sendRabbitE[i-1] = Rabbit[i][offsetWE]; MPI_Start(&reqSR[RIGHT]); //************** Recepção ***************/ //Raposas //Cima e baixo MPI_Start(&reqRF[DOWN]); MPI_Start(&reqRF[UP]); //Esquerda e direita MPI_Start(&reqRF[RIGHT]); MPI_Start(&reqRF[LEFT]); //Coelhos //Cima e baixo MPI_Start(&reqRR[DOWN]); MPI_Start(&reqRR[UP]); //Esquerda e direita MPI_Start(&reqRR[RIGHT]); MPI_Start(&reqRR[LEFT]); //Esperar pelos Receives e aplicar alterações nos quadros //Raposas MPI_Waitall(4, reqRR , statRR); for(i=1; i <= offsetWE; i++) Fox[offsetNS+1][i] = buf_recvFoxS[i-1]; for(i=1; i <= offsetWE; i++) Fox[0][i] = buf_recvFoxN[i-1]; for(i=1; i <= offsetNS; i++) Fox[i][offsetWE+1] = buf_recvFoxE[i-1]; for(i=1; i <= offsetNS; i++) Fox[i][0] = buf_recvFoxW[i-1]; //Coelhos MPI_Waitall(4, reqRF, statRF); for(i=1; i <= offsetWE; i++) Rabbit[offsetNS+1][i] = buf_recvRabbitS[i-1]; for(i=1; i <= offsetWE; i++) Rabbit[0][i] = buf_recvRabbitN[i-1]; for(i=1; i <= offsetNS; i++) Rabbit[i][offsetWE+1] = buf_recvRabbitE[i-1]; for(i=1; i <= offsetNS; i++) Rabbit[i][0] = buf_recvRabbitW[i-1]; /****************************************************** **** Termina comunicação de actualização ******** ******************************************************/ err = Evolve(Rabbit,Fox,TRabbit,TFox,model); if( !(k%PERIOD) ) { err = GetPopulation(Rabbit,&nbrab); err = GetPopulation(Fox,&nbfox); MPI_Reduce(&nbrab, &sumRabb, 1, MPI_FLOAT, MPI_SUM, 0, cartcomm); MPI_Reduce(&nbfox, &sumFox, 1, MPI_FLOAT, MPI_SUM, 0, cartcomm); //if(rank==0) // printf("Year %d: %.0f rabbits and %.0f foxes\n", k, sumRabb, sumFox); } //Esperar que os Sends estejam concluidos para ter a certeza que que já podemos mexer nos buffers //(Não creio de que 100% obrigatório) MPI_Waitall(4, reqSR , statSR); MPI_Waitall(4, reqSF , statSF); } if(rank==0) printf("Year %d: %.0f rabbits and %.0f foxes\n", k, sumRabb, sumFox); } if(rank==0) printf("Time: %f\n",MPI_Wtime()-time); MPI_Finalize(); return 0; }
FORT_DLL_SPEC void FORT_CALL mpi_send_init_ ( void*v1, MPI_Fint *v2, MPI_Fint *v3, MPI_Fint *v4, MPI_Fint *v5, MPI_Fint *v6, MPI_Fint *v7, MPI_Fint *ierr ){ *ierr = MPI_Send_init( v1, (int)*v2, (MPI_Datatype)(*v3), (int)*v4, (int)*v5, (MPI_Comm)(*v6), (MPI_Request *)(v7) ); }
static void _XMP_reflect_pcopy_sched_dim(_XMP_array_t *adesc, int target_dim, int lwidth, int uwidth, int is_periodic, void *dev_array_addr, int *lwidths, int *uwidths){ //printf("desc=%p, tardim=%d, lw=%d, uw=%d, devp=%p\n", adesc, target_dim, lwidth, uwidth, dev_array_addr); if (lwidth == 0 && uwidth == 0) return; _XMP_array_info_t *ai = &(adesc->info[target_dim]); _XMP_array_info_t *ainfo = adesc->info; _XMP_ASSERT(ai->align_manner == _XMP_N_ALIGN_BLOCK); _XMP_ASSERT(ai->is_shadow_comm_member); if (lwidth > ai->shadow_size_lo || uwidth > ai->shadow_size_hi){ _XMP_fatal("reflect width is larger than shadow width."); } _XMP_reflect_sched_t *reflect = ai->reflect_acc_sched; int target_tdim = ai->align_template_index; _XMP_nodes_info_t *ni = adesc->align_template->chunk[target_tdim].onto_nodes_info; int ndims = adesc->dim; // 0-origin int my_pos = ni->rank; int lb_pos = _XMP_get_owner_pos(adesc, target_dim, ai->ser_lower); int ub_pos = _XMP_get_owner_pos(adesc, target_dim, ai->ser_upper); int lo_pos = (my_pos == lb_pos) ? ub_pos : my_pos - 1; int hi_pos = (my_pos == ub_pos) ? lb_pos : my_pos + 1; MPI_Comm *comm = adesc->align_template->onto_nodes->comm; int my_rank = adesc->align_template->onto_nodes->comm_rank; int lo_rank = my_rank + (lo_pos - my_pos) * ni->multiplier; int hi_rank = my_rank + (hi_pos - my_pos) * ni->multiplier; int type_size = adesc->type_size; //void *array_addr = adesc->array_addr_p; void *lo_send_array = NULL; void *lo_recv_array = NULL; void *hi_send_array = NULL; void *hi_recv_array = NULL; void *lo_send_dev_buf = NULL; void *lo_recv_dev_buf = NULL; void *hi_send_dev_buf = NULL; void *hi_recv_dev_buf = NULL; void *lo_send_host_buf = NULL; void *lo_recv_host_buf = NULL; void *hi_send_host_buf = NULL; void *hi_recv_host_buf = NULL; void *mpi_lo_send_buf = NULL; void *mpi_lo_recv_buf = NULL; void *mpi_hi_send_buf = NULL; void *mpi_hi_recv_buf = NULL; int lo_buf_size = 0; int hi_buf_size = 0; // // setup data_type // int count = 0, blocklength = 0; long long stride = 0; // int count_offset = 0; if (_XMPF_running && !_XMPC_running){ /* for XMP/F */ count = 1; blocklength = type_size; stride = ainfo[0].alloc_size * type_size; for (int i = ndims - 2; i >= target_dim; i--){ count *= ainfo[i+1].alloc_size; } for (int i = 1; i <= target_dim; i++){ blocklength *= ainfo[i-1].alloc_size; stride *= ainfo[i].alloc_size; } } else if (!_XMPF_running && _XMPC_running){ /* for XMP/C */ count = 1; blocklength = type_size; stride = ainfo[ndims-1].alloc_size * type_size; /* if(target_dim > 0){ */ /* count *= ainfo[0].par_size; */ /* count_offset = ainfo[0].shadow_size_lo; */ /* } */ /* for (int i = 1; i < target_dim; i++){ */ /* count *= ainfo[i].alloc_size; */ /* } */ /* for (int i = ndims - 2; i >= target_dim; i--){ */ /* blocklength *= ainfo[i+1].alloc_size; */ /* stride *= ainfo[i].alloc_size; */ /* } */ if(target_dim == 0){ count *= 1; if(ndims >= 2){ blocklength *= (ainfo[1].par_size + lwidths[1] + uwidths[1]); } }else{ count *= (ainfo[0].par_size + lwidths[0] + uwidths[0]); for(int i = 1; i < target_dim; i++){ count *= ainfo[i].alloc_size; } blocklength *= ainfo[target_dim+1].alloc_size; stride *= ainfo[target_dim].alloc_size; } for(int i = target_dim+2; i < ndims; i++){ blocklength *= ainfo[i].alloc_size; } for(int i = target_dim+1 ; i < ndims - 1; i++){ stride *= ainfo[i].alloc_size; } /* mod_4 */ count = 1; blocklength = 1; stride = 1; for(int i = 0; i < ndims; i++){ int fact = (i == target_dim)? 1 : (ainfo[i].par_size + lwidths[i] + uwidths[i]); int alloc_size = ainfo[i].alloc_size; if(blocklength == 1 || fact == alloc_size){ blocklength *= fact; stride *= alloc_size; }else if(count == 1 && target_dim != 0){ //to be contiguous if target_dim==0 count = blocklength; blocklength = fact; stride = alloc_size; }else{ blocklength *= alloc_size; stride *= alloc_size; } //printf("tar=%d, i=%d, fact=%d, allocsize=%d, (%d,%d,%lld)\n", target_dim, i, fact, alloc_size, count , blocklength, stride); } blocklength *= type_size; stride *= type_size; /* mod_4 end */ /* it used at 150717 for (int i = 1; i <= target_dim; i++){ count *= ainfo[i-1].alloc_size; } for (int i = ndims - 2; i >= target_dim; i--){ blocklength *= ainfo[i+1].alloc_size; stride *= ainfo[i].alloc_size; } */ /* for (int i = target_dim + 1; i < ndims; i++){ */ /* blocklength *= ainfo[i].alloc_size; */ /* } */ /* for (int i = target_dim; i < ndims - 1; i++){ */ /* stride *= ainfo[i].alloc_size; */ /* } */ // printf("count =%d, blength=%d, stride=%lld\n", count ,blocklength, stride); // printf("ainfo[0].par_size=%d\n", ainfo[0].par_size); // printf("count_ofset=%d,\n", count_offset); } else { _XMP_fatal("cannot determin the base language."); } // // calculate base address // // for lower reflect if (lwidth){ lo_send_array = lo_recv_array = (void *)((char*)dev_array_addr + /*count_offset*/0 * stride); for (int i = 0; i < ndims; i++) { int lb_send, lb_recv; unsigned long long dim_acc; if (i == target_dim) { //printf("ainfo[%d].local_upper=%d\n",i,ainfo[i].local_upper); lb_send = ainfo[i].local_upper - lwidth + 1; lb_recv = ainfo[i].shadow_size_lo - lwidth; ////ainfo[i].local_lower - lwidth; } else { // Note: including shadow area lb_send = 0; //// ainfo[i].local_lower - ainfo[i].shadow_size_lo; lb_recv = 0; //// ainfo[i].local_lower - ainfo[i].shadow_size_lo; } dim_acc = ainfo[i].dim_acc; lo_send_array = (void *)((char *)lo_send_array + lb_send * dim_acc * type_size); lo_recv_array = (void *)((char *)lo_recv_array + lb_recv * dim_acc * type_size); } } // for upper reflect if (uwidth){ hi_send_array = hi_recv_array = (void *)((char*)dev_array_addr + /*count_offset*/0 * stride); for (int i = 0; i < ndims; i++) { int lb_send, lb_recv; unsigned long long dim_acc; if (i == target_dim) { lb_send = ainfo[i].local_lower; lb_recv = ainfo[i].local_upper + 1; } else { // Note: including shadow area lb_send = 0; //ainfo[i].local_lower - ainfo[i].shadow_size_lo; lb_recv = 0; //ainfo[i].local_lower - ainfo[i].shadow_size_lo; } dim_acc = ainfo[i].dim_acc; hi_send_array = (void *)((char *)hi_send_array + lb_send * dim_acc * type_size); hi_recv_array = (void *)((char *)hi_recv_array + lb_recv * dim_acc * type_size); } } // for lower reflect if (reflect->datatype_lo != MPI_DATATYPE_NULL){ MPI_Type_free(&reflect->datatype_lo); } if(packVector || count == 1){ MPI_Type_contiguous(blocklength * lwidth * count, MPI_BYTE, &reflect->datatype_lo); // MPI_Type_contiguous(blocklength * lwidth * count / type_size, MPI_FLOAT, &reflect->datatype_lo); fprintf(stderr, "dim=%d, send elements lo = %d\n", target_dim, blocklength * lwidth * count / type_size); //fprintf(stderr, "useHostBuf=%c , packVector=%c\n", useHostBuffer, packVector); // if(useHostBuffer){ fprintf(stderr,"using host buffer\n"); } // if(packVector){ fprintf(stderr, "using pack vector\n"); } }else{ MPI_Type_vector(count, blocklength * lwidth, stride, MPI_BYTE, &reflect->datatype_lo); } MPI_Type_commit(&reflect->datatype_lo); // for upper reflect if (reflect->datatype_hi != MPI_DATATYPE_NULL){ MPI_Type_free(&reflect->datatype_hi); } if(packVector || count == 1){ MPI_Type_contiguous(blocklength * uwidth * count, MPI_BYTE, &reflect->datatype_hi); // MPI_Type_contiguous(blocklength * uwidth * count / type_size, MPI_FLOAT, &reflect->datatype_hi); fprintf(stderr, "dim=%d, send elements hi = %d\n", target_dim, blocklength * uwidth * count / type_size); }else{ MPI_Type_vector(count, blocklength * uwidth, stride, MPI_BYTE, &reflect->datatype_hi); } MPI_Type_commit(&reflect->datatype_hi); // // Allocate buffers // if(useHostBuffer){ CUDA_SAFE_CALL(cudaFreeHost(reflect->lo_send_host_buf)); CUDA_SAFE_CALL(cudaFreeHost(reflect->lo_recv_host_buf)); } if ((_XMPF_running && target_dim != ndims - 1) || (_XMPC_running && target_dim != 0)){ if(packVector){ CUDA_SAFE_CALL(cudaFree(reflect->lo_send_buf)); CUDA_SAFE_CALL(cudaFree(reflect->lo_recv_buf)); } } if ((_XMPF_running && target_dim == ndims - 1) || (_XMPC_running && target_dim == 0)){ // } // for lower reflect if (lwidth){ lo_buf_size = lwidth * blocklength * count; hi_buf_size = uwidth * blocklength * count; if ((_XMPF_running && target_dim == ndims - 1) || (_XMPC_running && target_dim == 0)){ lo_send_dev_buf = lo_send_array; lo_recv_dev_buf = lo_recv_array; hi_send_dev_buf = hi_send_array; hi_recv_dev_buf = hi_recv_array; } else { _XMP_TSTART(t0); if(packVector){ CUDA_SAFE_CALL(cudaMalloc((void **)&lo_send_dev_buf, lo_buf_size + hi_buf_size)); hi_send_dev_buf = (char*)lo_send_dev_buf + lo_buf_size; CUDA_SAFE_CALL(cudaMalloc((void **)&lo_recv_dev_buf, lo_buf_size + hi_buf_size)); hi_recv_dev_buf = (char*)lo_recv_dev_buf + lo_buf_size; }else{ lo_send_dev_buf = lo_send_array; lo_recv_dev_buf = lo_recv_array; hi_send_dev_buf = hi_send_array; hi_recv_dev_buf = hi_recv_array; } _XMP_TEND2(xmptiming_.t_mem, xmptiming_.tdim_mem[target_dim], t0); } if(useHostBuffer){ CUDA_SAFE_CALL(cudaMallocHost((void**)&lo_send_host_buf, lo_buf_size + hi_buf_size)); hi_send_host_buf = (char*)lo_send_host_buf + lo_buf_size; CUDA_SAFE_CALL(cudaMallocHost((void**)&lo_recv_host_buf, lo_buf_size + hi_buf_size)); hi_recv_host_buf = (char*)lo_recv_host_buf + lo_buf_size; mpi_lo_send_buf = lo_send_host_buf; mpi_lo_recv_buf = lo_recv_host_buf; mpi_hi_send_buf = hi_send_host_buf; mpi_hi_recv_buf = hi_recv_host_buf; }else{ mpi_lo_send_buf = lo_send_dev_buf; mpi_lo_recv_buf = lo_recv_dev_buf; mpi_hi_send_buf = hi_send_dev_buf; mpi_hi_recv_buf = hi_recv_dev_buf; } } // for upper reflect // // initialize communication // int src, dst; if (!is_periodic && my_pos == lb_pos){ // no periodic lo_rank = MPI_PROC_NULL; } if (!is_periodic && my_pos == ub_pos){ // no periodic hi_rank = MPI_PROC_NULL; } // for lower shadow if (lwidth){ src = lo_rank; dst = hi_rank; } else { src = MPI_PROC_NULL; dst = MPI_PROC_NULL; } // fprintf(stderr, "dim=%d, lo_src=%d, lo_dst=%d\n", target_dim, src, dst); if (reflect->req[0] != MPI_REQUEST_NULL){ MPI_Request_free(&reflect->req[0]); } if (reflect->req[1] != MPI_REQUEST_NULL){ MPI_Request_free(&reflect->req[1]); } MPI_Recv_init(mpi_lo_recv_buf, 1, reflect->datatype_lo, src, _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req[0]); MPI_Send_init(mpi_lo_send_buf, 1, reflect->datatype_lo, dst, _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req[1]); // for upper shadow if (uwidth){ src = hi_rank; dst = lo_rank; } else { src = MPI_PROC_NULL; dst = MPI_PROC_NULL; } // fprintf(stderr, "dim=%d, hi_src=%d, hi_dst=%d\n", target_dim, src, dst); if (reflect->req[2] != MPI_REQUEST_NULL){ MPI_Request_free(&reflect->req[2]); } if (reflect->req[3] != MPI_REQUEST_NULL){ MPI_Request_free(&reflect->req[3]); } MPI_Recv_init(mpi_hi_recv_buf, 1, reflect->datatype_hi, src, _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req[2]); MPI_Send_init(mpi_hi_send_buf, 1, reflect->datatype_hi, dst, _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req[3]); // // cache schedule // reflect->count = count; reflect->blocklength = blocklength; reflect->stride = stride; reflect->lo_send_array = lo_send_array; reflect->lo_recv_array = lo_recv_array; reflect->hi_send_array = hi_send_array; reflect->hi_recv_array = hi_recv_array; if(packVector){ reflect->lo_send_buf = lo_send_dev_buf; reflect->lo_recv_buf = lo_recv_dev_buf; reflect->hi_send_buf = hi_send_dev_buf; reflect->hi_recv_buf = hi_recv_dev_buf; } if(useHostBuffer){ reflect->lo_send_host_buf = lo_send_host_buf; reflect->lo_recv_host_buf = lo_recv_host_buf; reflect->hi_send_host_buf = hi_send_host_buf; reflect->hi_recv_host_buf = hi_recv_host_buf; } reflect->lo_rank = lo_rank; reflect->hi_rank = hi_rank; // gpu async reflect->lo_async_id = _XMP_alloc(sizeof(cudaStream_t)); CUDA_SAFE_CALL(cudaStreamCreate(reflect->lo_async_id)); if(target_dim != 0 && (!useHostBuffer || (lo_rank != MPI_PROC_NULL && hi_rank != MPI_PROC_NULL && (lo_buf_size / type_size) <= useSingleStreamLimit)) ){ reflect->hi_async_id = NULL; }else{ cudaStream_t *hi_stream = (cudaStream_t*)_XMP_alloc(sizeof(cudaStream_t)); CUDA_SAFE_CALL(cudaStreamCreate(hi_stream)); reflect->hi_async_id = (void*)hi_stream; } reflect->event = _XMP_alloc(sizeof(cudaEvent_t)); CUDA_SAFE_CALL(cudaEventCreateWithFlags(reflect->event, cudaEventDisableTiming)); }
static void test_pair (void) { int prev, next, count, tag, index, i, outcount, indices[2]; int rank, size, flag, ierr, reqcount; double send_buf[TEST_SIZE], recv_buf[TEST_SIZE]; double buffered_send_buf[TEST_SIZE * 2 + MPI_BSEND_OVERHEAD]; /* factor of two is based on guessing - only dynamic allocation would be safe */ void *buffer; MPI_Status statuses[2]; MPI_Status status; MPI_Request requests[2]; MPI_Comm dupcom, intercom; #ifdef V_T struct _VT_FuncFrameHandle { char *name; int func; int frame; }; typedef struct _VT_FuncFrameHandle VT_FuncFrameHandle_t; VT_FuncFrameHandle_t normal_sends, buffered_sends, buffered_persistent_sends, ready_sends, sync_sends, nblock_sends, nblock_rsends, nblock_ssends, pers_sends, pers_rsends, pers_ssends, sendrecv, sendrecv_repl, intercomm; int classid; VT_classdef( "Application:test_pair", &classid ); #define VT_REGION_DEF( _name, _nameframe, _class ) \ (_nameframe).name=_name; \ VT_funcdef( (_nameframe).name, _class, &((_nameframe).func) ); #define VT_BEGIN_REGION( _nameframe ) \ LOCDEF(); \ VT_begin( (_nameframe).func ) #define VT_END_REGION( _nameframe ) \ LOCDEF(); VT_end( (_nameframe).func ) #else #define VT_REGION_DEF( _name, _nameframe, _class ) #define VT_BEGIN_REGION( _nameframe ) #define VT_END_REGION( _nameframe ) #endif ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); ierr = MPI_Comm_size(MPI_COMM_WORLD, &size); if ( size < 2 ) { if ( rank == 0 ) { printf("Program needs to be run on at least 2 processes.\n"); } ierr = MPI_Abort( MPI_COMM_WORLD, 66 ); } ierr = MPI_Comm_dup(MPI_COMM_WORLD, &dupcom); if ( rank >= 2 ) { /* printf( "%d Calling finalize.\n", rank ); */ ierr = MPI_Finalize( ); exit(0); } next = rank + 1; if (next >= 2) next = 0; prev = rank - 1; if (prev < 0) prev = 1; VT_REGION_DEF( "Normal_Sends", normal_sends, classid ); VT_REGION_DEF( "Buffered_Sends", buffered_sends, classid ); VT_REGION_DEF( "Buffered_Persistent_Sends", buffered_persistent_sends, classid ); VT_REGION_DEF( "Ready_Sends", ready_sends, classid ); VT_REGION_DEF( "Sync_Sends", sync_sends, classid ); VT_REGION_DEF( "nblock_Sends", nblock_sends, classid ); VT_REGION_DEF( "nblock_RSends", nblock_rsends, classid ); VT_REGION_DEF( "nblock_SSends", nblock_ssends, classid ); VT_REGION_DEF( "Pers_Sends", pers_sends, classid ); VT_REGION_DEF( "Pers_RSends", pers_rsends, classid ); VT_REGION_DEF( "Pers_SSends", pers_ssends, classid ); VT_REGION_DEF( "SendRecv", sendrecv, classid ); VT_REGION_DEF( "SendRevc_Repl", sendrecv_repl, classid ); VT_REGION_DEF( "InterComm", intercomm, classid ); /* * Normal sends */ VT_BEGIN_REGION( normal_sends ); if (rank == 0) printf ("Send\n"); tag = 0x100; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); LOCDEF(); MPI_Send(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check(recv_buf, prev, tag, count, &status, TEST_SIZE, "send and recv"); } else { LOCDEF(); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE,"send and recv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Send(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( normal_sends ); /* * Buffered sends */ VT_BEGIN_REGION( buffered_sends ); if (rank == 0) printf ("Buffered Send\n"); tag = 138; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); LOCDEF(); MPI_Buffer_attach(buffered_send_buf, sizeof(buffered_send_buf)); MPI_Bsend(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); MPI_Buffer_detach(&buffer, &size); if(buffer != buffered_send_buf || size != sizeof(buffered_send_buf)) { printf ("[%d] Unexpected buffer returned by MPI_Buffer_detach(): %p/%d != %p/%d\n", rank, buffer, size, buffered_send_buf, (int)sizeof(buffered_send_buf)); MPI_Abort(MPI_COMM_WORLD, 201); } MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check(recv_buf, prev, tag, count, &status, TEST_SIZE, "send and recv"); } else { LOCDEF(); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE,"send and recv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Send(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( buffered_sends ); /* * Buffered sends */ VT_BEGIN_REGION( buffered_persistent_sends ); if (rank == 0) printf ("Buffered Persistent Send\n"); tag = 238; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); LOCDEF(); MPI_Buffer_attach(buffered_send_buf, sizeof(buffered_send_buf)); MPI_Bsend_init(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, requests); MPI_Start(requests); MPI_Wait(requests, statuses); MPI_Request_free(requests); MPI_Buffer_detach(&buffer, &size); if(buffer != buffered_send_buf || size != sizeof(buffered_send_buf)) { printf ("[%d] Unexpected buffer returned by MPI_Buffer_detach(): %p/%d != %p/%d\n", rank, buffer, size, buffered_send_buf, (int)sizeof(buffered_send_buf)); MPI_Abort(MPI_COMM_WORLD, 201); } MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check(recv_buf, prev, tag, count, &status, TEST_SIZE, "send and recv"); } else { LOCDEF(); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE,"send and recv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Send(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( buffered_persistent_sends ); /* * Ready sends. Note that we must insure that the receive is posted * before the rsend; this requires using Irecv. */ VT_BEGIN_REGION( ready_sends ); if (rank == 0) printf ("Rsend\n"); tag = 1456; count = TEST_SIZE / 3; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); MPI_Recv(MPI_BOTTOM, 0, MPI_INT, next, tag, MPI_COMM_WORLD, &status); MPI_Rsend(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); MPI_Probe(MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &status); if (status.MPI_SOURCE != prev) printf ("Incorrect src, expected %d, got %d\n",prev, status.MPI_SOURCE); if (status.MPI_TAG != tag) printf ("Incorrect tag, expected %d, got %d\n",tag, status.MPI_TAG); MPI_Get_count(&status, MPI_DOUBLE, &i); if (i != count) printf ("Incorrect count, expected %d, got %d\n",count,i); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "rsend and recv"); } else { MPI_Irecv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, requests); MPI_Send( MPI_BOTTOM, 0, MPI_INT, next, tag, MPI_COMM_WORLD); MPI_Wait(requests, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "rsend and recv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Send(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( ready_sends ); /* * Synchronous sends */ VT_BEGIN_REGION( sync_sends ); if (rank == 0) printf ("Ssend\n"); tag = 1789; count = TEST_SIZE / 3; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); MPI_Iprobe(MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &flag, &status); if (flag) printf ("Iprobe succeeded! source %d, tag %d\n",status.MPI_SOURCE, status.MPI_TAG); MPI_Ssend(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); while (!flag) MPI_Iprobe(MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &flag, &status); if (status.MPI_SOURCE != prev) printf ("Incorrect src, expected %d, got %d\n",prev, status.MPI_SOURCE); if (status.MPI_TAG != tag) printf ("Incorrect tag, expected %d, got %d\n",tag, status.MPI_TAG); MPI_Get_count(&status, MPI_DOUBLE, &i); if (i != count) printf ("Incorrect count, expected %d, got %d\n",count,i); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "ssend and recv"); } else { MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "ssend and recv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Ssend(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( sync_sends ); /* * Nonblocking normal sends */ VT_BEGIN_REGION( nblock_sends ); if (rank == 0) printf ("Isend\n"); tag = 2123; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { MPI_Irecv(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, requests); init_test_data(send_buf,TEST_SIZE,0); MPI_Isend(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, (requests+1)); MPI_Waitall(2, requests, statuses); rq_check( requests, 2, "isend and irecv" ); msg_check(recv_buf,prev,tag,count,statuses, TEST_SIZE,"isend and irecv"); } else { MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check(recv_buf,prev,tag,count,&status, TEST_SIZE,"isend and irecv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Isend(recv_buf, count, MPI_DOUBLE, next, tag,MPI_COMM_WORLD, (requests)); MPI_Wait((requests), &status); rq_check(requests, 1, "isend (and recv)"); } VT_END_REGION( nblock_sends ); /* * Nonblocking ready sends */ VT_BEGIN_REGION( nblock_rsends ); if (rank == 0) printf ("Irsend\n"); tag = 2456; count = TEST_SIZE / 3; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { MPI_Irecv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, requests); init_test_data(send_buf,TEST_SIZE,0); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, next, 0, MPI_BOTTOM, 0, MPI_INT, next, 0, dupcom, &status); MPI_Irsend(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, (requests+1)); reqcount = 0; while (reqcount != 2) { MPI_Waitany( 2, requests, &index, statuses); if( index == 0 ) { memcpy( &status, statuses, sizeof(status) ); } reqcount++; } rq_check( requests, 1, "irsend and irecv"); msg_check(recv_buf,prev,tag,count,&status, TEST_SIZE,"irsend and irecv"); } else { MPI_Irecv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, requests); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, next, 0, MPI_BOTTOM, 0, MPI_INT, next, 0, dupcom, &status); flag = 0; while (!flag) MPI_Test(requests, &flag, &status); rq_check( requests, 1, "irsend and irecv (test)"); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "irsend and irecv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Irsend(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, requests); MPI_Waitall(1, requests, statuses); rq_check( requests, 1, "irsend and irecv"); } VT_END_REGION( nblock_rsends ); /* * Nonblocking synchronous sends */ VT_BEGIN_REGION( nblock_ssends ); if (rank == 0) printf ("Issend\n"); tag = 2789; count = TEST_SIZE / 3; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { MPI_Irecv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, requests ); init_test_data(send_buf,TEST_SIZE,0); MPI_Issend(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, (requests+1)); flag = 0; while (!flag) MPI_Testall(2, requests, &flag, statuses); rq_check( requests, 2, "issend and irecv (testall)"); msg_check( recv_buf, prev, tag, count, statuses, TEST_SIZE, "issend and recv"); } else { MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "issend and recv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Issend(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD,requests); flag = 0; while (!flag) MPI_Testany(1, requests, &index, &flag, statuses); rq_check( requests, 1, "issend and recv (testany)"); } VT_END_REGION( nblock_ssends ); /* * Persistent normal sends */ VT_BEGIN_REGION( pers_sends ); if (rank == 0) printf ("Send_init\n"); tag = 3123; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); MPI_Send_init(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, requests); MPI_Recv_init(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, (requests+1)); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); MPI_Startall(2, requests); MPI_Waitall(2, requests, statuses); msg_check( recv_buf, prev, tag, count, (statuses+1), TEST_SIZE, "persistent send/recv"); } else { MPI_Start((requests+1)); MPI_Wait((requests+1), &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "persistent send/recv"); init_test_data(send_buf,TEST_SIZE,1); MPI_Start(requests); MPI_Wait(requests, &status); } MPI_Request_free(requests); MPI_Request_free((requests+1)); VT_END_REGION( pers_sends ); /* * Persistent ready sends */ VT_BEGIN_REGION( pers_rsends ); if (rank == 0) printf ("Rsend_init\n"); tag = 3456; count = TEST_SIZE / 3; clear_test_data(recv_buf,TEST_SIZE); MPI_Rsend_init(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, requests); MPI_Recv_init(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, (requests+1)); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); MPI_Barrier( MPI_COMM_WORLD ); MPI_Startall(2, requests); reqcount = 0; while (reqcount != 2) { MPI_Waitsome(2, requests, &outcount, indices, statuses); for (i=0; i<outcount; i++) { if (indices[i] == 1) { msg_check( recv_buf, prev, tag, count, (statuses+i), TEST_SIZE, "waitsome"); } reqcount++; } } } else { MPI_Start((requests+1)); MPI_Barrier( MPI_COMM_WORLD ); flag = 0; while (!flag) MPI_Test((requests+1), &flag, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "test"); init_test_data(send_buf,TEST_SIZE,1); MPI_Start(requests); MPI_Wait(requests, &status); } MPI_Request_free(requests); MPI_Request_free((requests+1)); VT_END_REGION( pers_rsends ); /* * Persistent synchronous sends */ VT_BEGIN_REGION( pers_ssends ); if (rank == 0) printf ("Ssend_init\n"); tag = 3789; count = TEST_SIZE / 3; clear_test_data(recv_buf,TEST_SIZE); MPI_Ssend_init(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, (requests+1)); MPI_Recv_init(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, requests); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); MPI_Startall(2, requests); reqcount = 0; while (reqcount != 2) { MPI_Testsome(2, requests, &outcount, indices, statuses); for (i=0; i<outcount; i++) { if (indices[i] == 0) { msg_check( recv_buf, prev, tag, count, (statuses+i), TEST_SIZE, "testsome"); } reqcount++; } } } else { MPI_Start(requests); flag = 0; while (!flag) MPI_Testany(1, requests, &index, &flag, statuses); msg_check( recv_buf, prev, tag, count, statuses, TEST_SIZE, "testany" ); init_test_data(send_buf,TEST_SIZE,1); MPI_Start((requests+1)); MPI_Wait((requests+1), &status); } MPI_Request_free(requests); MPI_Request_free((requests+1)); VT_END_REGION( pers_ssends ); /* * Send/receive. */ VT_BEGIN_REGION( sendrecv ); if (rank == 0) printf ("Sendrecv\n"); tag = 4123; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); MPI_Sendrecv(send_buf, count, MPI_DOUBLE, next, tag, recv_buf, count, MPI_DOUBLE, prev, tag, MPI_COMM_WORLD, &status ); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "sendrecv"); } else { MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "recv/send"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Send(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( sendrecv ); #ifdef V_T VT_flush(); #endif /* * Send/receive replace. */ VT_BEGIN_REGION( sendrecv_repl ); if (rank == 0) printf ("Sendrecv_replace\n"); tag = 4456; count = TEST_SIZE / 3; if (rank == 0) { init_test_data(recv_buf, TEST_SIZE,0); for (i=count; i< TEST_SIZE; i++) recv_buf[i] = 0.0; MPI_Sendrecv_replace(recv_buf, count, MPI_DOUBLE, next, tag, prev, tag, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "sendrecvreplace"); } else { clear_test_data(recv_buf,TEST_SIZE); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "recv/send for replace"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Send(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( sendrecv_repl ); /* * Send/Receive via inter-communicator */ VT_BEGIN_REGION( intercomm ); MPI_Intercomm_create(MPI_COMM_SELF, 0, MPI_COMM_WORLD, next, 1, &intercom); if (rank == 0) printf ("Send via inter-communicator\n"); tag = 4018; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); LOCDEF(); MPI_Send(send_buf, count, MPI_DOUBLE, 0, tag, intercom); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, intercom, &status); msg_check(recv_buf, 0, tag, count, &status, TEST_SIZE, "send and recv via inter-communicator"); } else if (rank == 1) { LOCDEF(); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, intercom, &status); msg_check( recv_buf, 0, tag, count, &status, TEST_SIZE,"send and recv via inter-communicator"); init_test_data(recv_buf,TEST_SIZE,0); MPI_Send(recv_buf, count, MPI_DOUBLE, 0, tag, intercom); } VT_END_REGION( normal_sends ); MPI_Comm_free(&intercom); MPI_Comm_free(&dupcom); }
int main (int argc, char *argv[]) { int numtasks, rank, len, rc; char hostname[MPI_MAX_PROCESSOR_NAME]; int buffer[10]; int buffer2[20]; int buffer3[NUMTASKS*10]; int buffer4[NUMTASKS*10 + (NUMTASKS-1)]; int displs[NUMTASKS]; int recvcounts[NUMTASKS]; int i, mpi_errno; int rank__; MPI_Status status; MPI_Request request; rc = MPI_Init(&argc,&argv); if (rc != MPI_SUCCESS) { printf("Error starting MPI program. Termination.\n"); MPI_Abort(MPI_COMM_WORLD, rc); } MPI_Comm_size(MPI_COMM_WORLD,&numtasks); MPI_Comm_rank(MPI_COMM_WORLD,&rank); MPI_Get_processor_name(hostname, &len); if (numtasks < NUMTASKS) { if (!rank) printf("I need at least %d tasks!!!\n",NUMTASKS); MPI_Finalize(); return -1; } /* * PT2PT */ if (!rank) printf("Testing MPI_Send and MPI_Recv between 0 and 1... "); /* Sending a buffer of 10 integers to process 1 */ if (!rank) { // producer (rank == 0) for (i=0; i < 10; i++) buffer[i] = i; mpi_errno = MPI_Send(buffer,10,MPI_INT,1,1,MPI_COMM_WORLD); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Send!\n"); } if (rank == 1) { // consumer (rank == 1) for (i=0; i < 10; i++) buffer[i] = -1; mpi_errno = MPI_Recv(buffer,10,MPI_INT,0,1,MPI_COMM_WORLD,&status); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Recv!\n"); for (i=0; i < 10; i++) { if (buffer[i] != i) printf("??? buffer[%d]=%d\n",i,buffer[i]); } } if (!rank && mpi_errno == MPI_SUCCESS) { printf("OK\n"); printf("Testing MPI_Sendrecv. Send between 0 and 1. Recv between 2 and 0... "); } if (!rank) { // (rank == 0) for (i=0; i < 10; i++) // producer buffer[i] = i; for (i=0; i < 20; i++) // consumer buffer2[i] = -1; mpi_errno = MPI_Sendrecv(buffer,10,MPI_INT,1,2, buffer2,20,MPI_INT,2,3,MPI_COMM_WORLD,&status); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Sendrecv!\n"); for (i=0; i < 20; i++) { if (buffer2[i] != 20 - i) printf("??? buffer2[%d]=%d\n",i,buffer2[i]); } } if (rank == 1) { // (rank == 1) for (i=0; i < 10; i++) // consumer buffer[i] = -1; mpi_errno = MPI_Recv(buffer,10,MPI_INT,0,2,MPI_COMM_WORLD,&status); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Recv!\n"); for (i=0; i < 10; i++) { if (buffer[i] != i) printf("??? buffer[%d]=%d\n",i,buffer[i]); } } if (rank == 2) { // (rank == 2) for (i=0; i < 20; i++) // producer buffer2[i] = 20 - i; mpi_errno = MPI_Send(buffer2,20,MPI_INT,0,3,MPI_COMM_WORLD); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Send!\n"); } if (!rank && mpi_errno == MPI_SUCCESS) { printf("OK\n"); } if (!rank){ printf("Testing MPI_Sendrecv_replace. Send between 0 and 1. Recv between 2 and 0... "); } if (!rank) { // (rank == 0) for (i=0; i < 10; i++) // producer (and consumer!) buffer[i] = i; mpi_errno = MPI_Sendrecv_replace(buffer,10,MPI_INT,1,4,2,5,MPI_COMM_WORLD,&status); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Sendrecv!\n"); for (i=0; i < 10; i++) { if (buffer[i] != 10 - i) printf("??? buffer[%d]=%d\n",i,buffer[i]); } } if (rank == 1) { // (rank == 1) for (i=0; i < 10; i++) // consumer buffer[i] = -1; mpi_errno = MPI_Recv(buffer,10,MPI_INT,0,4,MPI_COMM_WORLD,&status); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Recv!\n"); for (i=0; i < 10; i++) { if (buffer[i] != i) printf("??? buffer[%d]=%d\n",i,buffer[i]); } } if (rank == 2) { // (rank == 2) for (i=0; i < 10; i++) // producer buffer[i] = 10 - i; mpi_errno = MPI_Send(buffer,10,MPI_INT,0,5,MPI_COMM_WORLD); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Send!\n"); } if (!rank && mpi_errno == MPI_SUCCESS) { printf("OK\n"); } if (!rank) { printf("Testing MPI_Irecv and MPI_Isend. Send between 0 and 1... "); } if (rank == 1) { // producer for (i=0; i < 10; i++) buffer[i] = i; mpi_errno = MPI_Isend(buffer,10,MPI_INT,0,6,MPI_COMM_WORLD,&request); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Isend!\n"); sleep(2); mpi_errno = MPI_Wait(&request,&status); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Wait!\n"); } if (rank == 0) { // consumer for (i=0; i < 10; i++) buffer[i] = -1; mpi_errno = MPI_Irecv(buffer,10,MPI_INT,1,6,MPI_COMM_WORLD,&request); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Irecv!\n"); mpi_errno = MPI_Wait(&request,&status); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Wait!\n"); for (i=0; i < 10; i++) { if (buffer[i] != i) printf("??? buffer[%d]=%d\n",i,buffer[i]); } } if (!rank && mpi_errno == MPI_SUCCESS) printf("OK\n"); if (!rank) { printf("Testing MPI_Send_init and MPI_Recv_init. 0 --> 3... "); } if (rank == 0) { // producer for (i=0; i < 10; i++) buffer[i] = i; mpi_errno = MPI_Send_init(buffer,10,MPI_INT, 3,7,MPI_COMM_WORLD, &request); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Send_init!\n"); //sleep(3); mpi_errno = MPI_Start(&request); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Start!\n"); mpi_errno = MPI_Wait(&request,&status); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Wait!\n"); mpi_errno = MPI_Request_free(&request); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Request_free!\n"); } if (rank == 3) { // Consumer for (i=0; i < 10; i++) buffer[i] = -1; mpi_errno = MPI_Recv_init(buffer,10,MPI_INT, 0,7,MPI_COMM_WORLD, &request); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Recv_init!\n"); sleep(7); mpi_errno = MPI_Start(&request); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Start!\n"); mpi_errno = MPI_Wait(&request,&status); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Wait!\n"); mpi_errno = MPI_Request_free(&request); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Request_free!\n"); } if (!rank && mpi_errno == MPI_SUCCESS) printf("OK\n"); /* * COLL */ if (!rank) printf("Testing MPI_Bcast. 4 to all processes... "); for(i=0; i < 10; i++) { if (rank == 4) { // producer buffer[i] = (int) pow((double)2,(double)i); // 2^i } else { // consumer buffer[i] = -1; } } mpi_errno = MPI_Bcast(buffer,10,MPI_INT,4,MPI_COMM_WORLD); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Bcast!\n"); if (rank != 4) { for (i=0; i < 10; i++) if (buffer[i] != (int) pow((double)2,(double)i)) // 2^i printf("??? buffer[%d]=%d\n",i,buffer[i]); } if (!rank && mpi_errno == MPI_SUCCESS) printf("OK\n"); if (!rank) printf("Testing MPI_Gather. All to 5... "); if (rank == 5) // consumer for (i=0; i < NUMTASKS*10; i++) buffer3[i] = -1; // producer for (i=0; i < 10; i++) buffer[i] = (10*rank) + i; mpi_errno = MPI_Gather(buffer,10,MPI_INT, buffer3,10,MPI_INT,5,MPI_COMM_WORLD); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Gather!\n"); if (rank == 5) { rank__ = -1; for (i=0; i < NUMTASKS*10; i++) { if (i % 10 == 0) rank__++; if (buffer3[i] != (10*rank__) + (i % 10)) printf("??? buffer3[%d]=%d vs %d\n",i,buffer3[i],(10*rank__)+(i % 10)); } } if (!rank && mpi_errno == MPI_SUCCESS) printf("OK\n"); if (!rank) printf("Testing MPI_Gatherv. All to 5... "); if (rank == 5) { // consumer for (i=0; i < NUMTASKS*10 + (NUMTASKS-1); i++) buffer4[i] = -1; for (i=0; i < NUMTASKS; i++) displs[i] = (10*i) + i; for (i=0; i < NUMTASKS; i++) recvcounts[i] = 10; } // producer for (i=0; i < 10; i++) buffer[i] = (10*rank) + (10 - i); mpi_errno = MPI_Gatherv(buffer,10,MPI_INT, buffer4,recvcounts,displs, MPI_INT,5,MPI_COMM_WORLD); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Gatherv!\n"); /* if (rank == 5) { for (i=0; i < NUMTASKS*10 + (NUMTASKS-1); i++) printf("buffer4[%d]=%d\n",i,buffer4[i]); } */ if (!rank && mpi_errno == MPI_SUCCESS) printf("OK\n"); if (!rank) printf("Testing MPI_Scatter. 6 to all... "); if (rank == 6) {// producer for (i=0; i < NUMTASKS; i++) buffer[i] = i; } // consumer buffer2[0] = -1; mpi_errno = MPI_Scatter(buffer,1,MPI_INT, buffer2,1,MPI_INT,6,MPI_COMM_WORLD); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Scatter!\n"); if (buffer2[0] != rank) printf("??? result=%d vs %d\n",buffer2[0],rank); if (!rank && mpi_errno==MPI_SUCCESS) printf("OK\n"); if (!rank) printf("Testing MPI_Alltoall. all to all... "); for (i=0; i < NUMTASKS; i++) { // to send buffer[i] = i; } for (i=0; i < NUMTASKS; i++) { // to recv buffer2[i] = -1; } mpi_errno = MPI_Alltoall(buffer,1,MPI_INT,buffer2,1,MPI_INT,MPI_COMM_WORLD); if (mpi_errno != MPI_SUCCESS) printf("Something went wrong in the MPI_Alltoall\n"); // all processes should have an array of size NUMTASKS with its // rank repeated all over. For example, for process 3: // buffer2[] = [3, 3, 3, ..., 3] for (i=0; i < NUMTASKS; i++) if (buffer2[i] != rank) printf("??? buffer2[%d]=%d (for tasks %d)\n",i,buffer2[i],rank); if (!rank && mpi_errno==MPI_SUCCESS) printf("OK\n"); MPI_Finalize(); return 0; }
int main (int argc, char **argv) { int nprocs = -1; int rank = -1; MPI_Comm comm = MPI_COMM_WORLD; char processor_name[128]; int namelen = 128; int bbuf[(BUF_SIZE + MPI_BSEND_OVERHEAD) * 2 * NUM_BSEND_TYPES]; int buf[BUF_SIZE * 2 * NUM_SEND_TYPES]; int i, j, k, at_size, send_t_number, index, outcount, total, flag; int num_errors, error_count, indices[2 * NUM_SEND_TYPES]; MPI_Request aReq[2 * NUM_SEND_TYPES]; MPI_Status aStatus[2 * NUM_SEND_TYPES]; /* init */ MPI_Init (&argc, &argv); MPI_Comm_size (comm, &nprocs); MPI_Comm_rank (comm, &rank); MPI_Get_processor_name (processor_name, &namelen); printf ("(%d) is alive on %s\n", rank, processor_name); fflush (stdout); MPI_Buffer_attach (bbuf, sizeof(int) * (BUF_SIZE + MPI_BSEND_OVERHEAD) * 2 * NUM_BSEND_TYPES); if (rank == 0) { /* set up persistent sends... */ send_t_number = NUM_SEND_TYPES - NUM_PERSISTENT_SEND_TYPES; MPI_Send_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Send_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); send_t_number++; MPI_Bsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Bsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); send_t_number++; MPI_Rsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Rsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); send_t_number++; MPI_Ssend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Ssend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); } for (k = 0; k < (NUM_COMPLETION_MECHANISMS * 2); k++) { if (rank == 0) { /* initialize all of the send buffers */ for (j = 0; j < NUM_SEND_TYPES; j++) { for (i = 0; i < BUF_SIZE; i++) { buf[2 * j * BUF_SIZE + i] = i; buf[((2 * j + 1) * BUF_SIZE) + i] = BUF_SIZE - 1 - i; } } } else if (rank == 1) { /* zero out all of the receive buffers */ bzero (buf, sizeof(int) * BUF_SIZE * 2 * NUM_SEND_TYPES); } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { /* set up transient sends... */ send_t_number = 0; MPI_Isend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Isend (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); send_t_number++; MPI_Ibsend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Ibsend (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); send_t_number++; /* Barrier to ensure receives are posted for rsends... */ MPI_Barrier(MPI_COMM_WORLD); MPI_Irsend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Irsend (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); send_t_number++; MPI_Issend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Issend (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); /* just to be paranoid */ send_t_number++; assert (send_t_number == NUM_SEND_TYPES - NUM_PERSISTENT_SEND_TYPES); /* start the persistent sends... */ if (k % 2) { MPI_Startall (NUM_PERSISTENT_SEND_TYPES * 2, &aReq[2 * send_t_number]); } else { for (j = 0; j < NUM_PERSISTENT_SEND_TYPES * 2; j++) { MPI_Start (&aReq[2 * send_t_number + j]); } } /* NOTE: Changing the send buffer of a Bsend is NOT an error... */ for (j = 0; j < NUM_SEND_TYPES; j++) { /* muck the buffers */ buf[j * 2 * BUF_SIZE + (BUF_SIZE >> 1)] = BUF_SIZE; } printf ("USER MSG: 6 change send buffer errors in iteration #%d:\n", k); /* complete the sends */ switch (k/2) { case 0: /* use MPI_Wait */ for (j = 0; j < NUM_SEND_TYPES * 2; j++) { MPI_Wait (&aReq[j], &aStatus[j]); } break; case 1: /* use MPI_Waitall */ MPI_Waitall (NUM_SEND_TYPES * 2, aReq, aStatus); break; case 2: /* use MPI_Waitany */ for (j = 0; j < NUM_SEND_TYPES * 2; j++) { MPI_Waitany (NUM_SEND_TYPES * 2, aReq, &index, aStatus); } break; case 3: /* use MPI_Waitsome */ total = 0; while (total < NUM_SEND_TYPES * 2) { MPI_Waitsome (NUM_SEND_TYPES * 2, aReq, &outcount, indices, aStatus); total += outcount; } break; case 4: /* use MPI_Test */ for (j = 0; j < NUM_SEND_TYPES * 2; j++) { flag = 0; while (!flag) { MPI_Test (&aReq[j], &flag, &aStatus[j]); } } break; case 5: /* use MPI_Testall */ flag = 0; while (!flag) { MPI_Testall (NUM_SEND_TYPES * 2, aReq, &flag, aStatus); } break; case 6: /* use MPI_Testany */ for (j = 0; j < NUM_SEND_TYPES * 2; j++) { flag = 0; while (!flag) { MPI_Testany (NUM_SEND_TYPES * 2, aReq, &index, &flag, aStatus); } } break; case 7: /* use MPI_Testsome */ total = 0; while (total < NUM_SEND_TYPES * 2) { outcount = 0; while (!outcount) { MPI_Testsome (NUM_SEND_TYPES * 2, aReq, &outcount, indices, aStatus); } total += outcount; } break; default: assert (0); break; } } else if (rank == 1) {
void _XMP_reflect_pcopy_sched_dim(_XMP_array_t *adesc, int target_dim, int lwidth, int uwidth, int is_periodic, int shadow_comm_type){ if (lwidth == 0 && uwidth == 0) return; _XMP_array_info_t *ai = &(adesc->info[target_dim]); _XMP_array_info_t *ainfo = adesc->info; _XMP_ASSERT(ai->align_manner == _XMP_N_ALIGN_BLOCK); _XMP_ASSERT(ai->is_shadow_comm_member); if (lwidth > ai->shadow_size_lo || uwidth > ai->shadow_size_hi){ _XMP_fatal("reflect width is larger than shadow width."); } _XMP_reflect_sched_t *reflect = ai->reflect_sched; int target_tdim = ai->align_template_index; _XMP_nodes_info_t *ni = adesc->align_template->chunk[target_tdim].onto_nodes_info; if (ni->size == 1 && !is_periodic) return; int ndims = adesc->dim; // 0-origin int my_pos = ni->rank; int lb_pos = _XMP_get_owner_pos(adesc, target_dim, ai->ser_lower); int ub_pos = _XMP_get_owner_pos(adesc, target_dim, ai->ser_upper); int lo_pos = (my_pos == lb_pos) ? ub_pos : my_pos - 1; int hi_pos = (my_pos == ub_pos) ? lb_pos : my_pos + 1; MPI_Comm *comm = adesc->align_template->onto_nodes->comm; int my_rank = adesc->align_template->onto_nodes->comm_rank; int lo_rank = my_rank + (lo_pos - my_pos) * ni->multiplier; int hi_rank = my_rank + (hi_pos - my_pos) * ni->multiplier; int count = 0, blocklength = 0; long long stride = 0; int type_size = adesc->type_size; void *array_addr = adesc->array_addr_p; void *lo_send_array = NULL, *lo_recv_array = NULL; void *hi_send_array = NULL, *hi_recv_array = NULL; void *lo_send_buf = NULL; void *lo_recv_buf = NULL; void *hi_send_buf = NULL; void *hi_recv_buf = NULL; int lo_buf_size = 0; int hi_buf_size = 0; if (reflect->prev_pcopy_sched_type && lwidth == reflect->lo_width && uwidth == reflect->hi_width && is_periodic == reflect->is_periodic){ if ((adesc->order == MPI_ORDER_FORTRAN && target_dim != ndims - 1) || (adesc->order == MPI_ORDER_C && target_dim != 0)){ goto init_comm; } else if (reflect->prev_pcopy_sched_type != shadow_comm_type){ count = reflect->count; blocklength = reflect->blocklength; stride = reflect->stride; goto alloc_buf; } } // // setup data_type // if (adesc->order == MPI_ORDER_FORTRAN){ /* for XMP/F */ count = 1; blocklength = type_size; stride = ainfo[0].alloc_size * type_size; for (int i = ndims - 2; i >= target_dim; i--){ count *= ainfo[i+1].alloc_size; } for (int i = 1; i <= target_dim; i++){ blocklength *= ainfo[i-1].alloc_size; stride *= ainfo[i].alloc_size; } } else if (adesc->order == MPI_ORDER_C){ /* for XMP/C */ count = 1; blocklength = type_size; stride = ainfo[ndims-1].alloc_size * type_size; for (int i = 1; i <= target_dim; i++){ count *= ainfo[i-1].alloc_size; } for (int i = ndims - 2; i >= target_dim; i--){ blocklength *= ainfo[i+1].alloc_size; stride *= ainfo[i].alloc_size; } } else { _XMP_fatal("cannot determin the base language."); } // // calculate base address // alloc_buf: // for lower reflect if (lwidth){ lo_send_array = array_addr; lo_recv_array = array_addr; for (int i = 0; i < ndims; i++) { int lb_send, lb_recv; unsigned long long dim_acc; if (i == target_dim) { lb_send = ainfo[i].local_upper - lwidth + 1; lb_recv = ainfo[i].shadow_size_lo - lwidth;; } else { // Note: including shadow area lb_send = 0; lb_recv = 0; } dim_acc = ainfo[i].dim_acc; lo_send_array = (void *)((char *)lo_send_array + lb_send * dim_acc * type_size); lo_recv_array = (void *)((char *)lo_recv_array + lb_recv * dim_acc * type_size); } } // for upper reflect if (uwidth){ hi_send_array = array_addr; hi_recv_array = array_addr; for (int i = 0; i < ndims; i++) { int lb_send, lb_recv; unsigned long long dim_acc; if (i == target_dim) { lb_send = ainfo[i].local_lower; lb_recv = ainfo[i].local_upper + 1; } else { // Note: including shadow area lb_send = 0; lb_recv = 0; } dim_acc = ainfo[i].dim_acc; hi_send_array = (void *)((char *)hi_send_array + lb_send * dim_acc * type_size); hi_recv_array = (void *)((char *)hi_recv_array + lb_recv * dim_acc * type_size); } } // // Allocate buffers // if (reflect->prev_pcopy_sched_type == _XMP_COMM_REFLECT && ((adesc->order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) || (adesc->order == MPI_ORDER_C && target_dim == 0))){ ; } else { _XMP_free(reflect->lo_send_buf); _XMP_free(reflect->lo_recv_buf); _XMP_free(reflect->hi_send_buf); _XMP_free(reflect->hi_recv_buf); } // for lower reflect if (lwidth){ lo_buf_size = lwidth * blocklength * count; if (shadow_comm_type == _XMP_COMM_REFLECT && ((adesc->order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) || (adesc->order == MPI_ORDER_C && target_dim == 0))){ lo_send_buf = lo_send_array; lo_recv_buf = lo_recv_array; } else { _XMP_TSTART(t0); lo_send_buf = _XMP_alloc(lo_buf_size); lo_recv_buf = _XMP_alloc(lo_buf_size); _XMP_TEND2(xmptiming_.t_mem, xmptiming_.tdim_mem[target_dim], t0); } } // for upper reflect if (uwidth){ hi_buf_size = uwidth * blocklength * count; if (shadow_comm_type == _XMP_COMM_REFLECT && ((adesc->order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) || (adesc->order == MPI_ORDER_C && target_dim == 0))){ hi_send_buf = hi_send_array; hi_recv_buf = hi_recv_array; } else { _XMP_TSTART(t0); hi_send_buf = _XMP_alloc(hi_buf_size); hi_recv_buf = _XMP_alloc(hi_buf_size); _XMP_TEND2(xmptiming_.t_mem, xmptiming_.tdim_mem[target_dim], t0); } } // // cache schedule // reflect->count = count; reflect->blocklength = blocklength; reflect->stride = stride; reflect->lo_send_array = lo_send_array; reflect->lo_recv_array = lo_recv_array; reflect->hi_send_array = hi_send_array; reflect->hi_recv_array = hi_recv_array; reflect->lo_send_buf = lo_send_buf; reflect->lo_recv_buf = lo_recv_buf; reflect->hi_send_buf = hi_send_buf; reflect->hi_recv_buf = hi_recv_buf; // // initialize communication // int src, dst; init_comm: if (!is_periodic && my_pos == lb_pos){ // no periodic lo_rank = MPI_PROC_NULL; } if (!is_periodic && my_pos == ub_pos){ // no periodic hi_rank = MPI_PROC_NULL; } lo_buf_size = lwidth * reflect->blocklength * reflect->count; hi_buf_size = uwidth * reflect->blocklength * reflect->count; // for lower shadow if (lwidth){ src = lo_rank; dst = hi_rank; } else { src = MPI_PROC_NULL; dst = MPI_PROC_NULL; } if (shadow_comm_type == _XMP_COMM_REDUCE_SHADOW){ if (reflect->req_reduce[0] != MPI_REQUEST_NULL){ MPI_Request_free(&reflect->req_reduce[0]); } if (reflect->req_reduce[1] != MPI_REQUEST_NULL){ MPI_Request_free(&reflect->req_reduce[1]); } MPI_Send_init(reflect->lo_recv_buf, lo_buf_size, MPI_BYTE, src, _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req_reduce[0]); MPI_Recv_init(reflect->lo_send_buf, lo_buf_size, MPI_BYTE, dst, _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req_reduce[1]); } else { if (reflect->req[0] != MPI_REQUEST_NULL){ MPI_Request_free(&reflect->req[0]); } if (reflect->req[1] != MPI_REQUEST_NULL){ MPI_Request_free(&reflect->req[1]); } MPI_Recv_init(reflect->lo_recv_buf, lo_buf_size, MPI_BYTE, src, _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req[0]); MPI_Send_init(reflect->lo_send_buf, lo_buf_size, MPI_BYTE, dst, _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req[1]); } // for upper shadow if (uwidth){ src = hi_rank; dst = lo_rank; } else { src = MPI_PROC_NULL; dst = MPI_PROC_NULL; } if (shadow_comm_type == _XMP_COMM_REDUCE_SHADOW){ if (reflect->req_reduce[2] != MPI_REQUEST_NULL){ MPI_Request_free(&reflect->req_reduce[2]); } if (reflect->req_reduce[3] != MPI_REQUEST_NULL){ MPI_Request_free(&reflect->req_reduce[3]); } MPI_Send_init(reflect->hi_recv_buf, hi_buf_size, MPI_BYTE, src, _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req_reduce[2]); MPI_Recv_init(reflect->hi_send_buf, hi_buf_size, MPI_BYTE, dst, _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req_reduce[3]); } else { if (reflect->req[2] != MPI_REQUEST_NULL){ MPI_Request_free(&reflect->req[2]); } if (reflect->req[3] != MPI_REQUEST_NULL){ MPI_Request_free(&reflect->req[3]); } MPI_Recv_init(reflect->hi_recv_buf, hi_buf_size, MPI_BYTE, src, _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req[2]); MPI_Send_init(reflect->hi_send_buf, hi_buf_size, MPI_BYTE, dst, _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req[3]); } reflect->prev_pcopy_sched_type = shadow_comm_type; reflect->lo_rank = lo_rank; reflect->hi_rank = hi_rank; }
static void _XMP_reflect_normal_sched_dim(_XMP_array_t *adesc, int target_dim, int lwidth, int uwidth, int is_periodic){ if (lwidth == 0 && uwidth == 0) return; _XMP_array_info_t *ai = &(adesc->info[target_dim]); _XMP_array_info_t *ainfo = adesc->info; _XMP_ASSERT(ai->align_manner == _XMP_N_ALIGN_BLOCK); _XMP_ASSERT(ai->is_shadow_comm_member); if (lwidth > ai->shadow_size_lo || uwidth > ai->shadow_size_hi){ _XMP_fatal("reflect width is larger than shadow width."); } _XMP_reflect_sched_t *reflect = ai->reflect_sched; int target_tdim = ai->align_template_index; _XMP_nodes_info_t *ni = adesc->align_template->chunk[target_tdim].onto_nodes_info; if (ni->size == 1 && !is_periodic) return; int ndims = adesc->dim; // 0-origin int my_pos = ni->rank; int lb_pos = _XMP_get_owner_pos(adesc, target_dim, ai->ser_lower); int ub_pos = _XMP_get_owner_pos(adesc, target_dim, ai->ser_upper); int lo_pos = (my_pos == lb_pos) ? ub_pos : my_pos - 1; int hi_pos = (my_pos == ub_pos) ? lb_pos : my_pos + 1; MPI_Comm *comm = adesc->align_template->onto_nodes->comm; int my_rank = adesc->align_template->onto_nodes->comm_rank; int lo_rank = my_rank + (lo_pos - my_pos) * ni->multiplier; int hi_rank = my_rank + (hi_pos - my_pos) * ni->multiplier; int type_size = adesc->type_size; void *lo_recv_buf = adesc->array_addr_p; void *lo_send_buf = adesc->array_addr_p; void *hi_recv_buf = adesc->array_addr_p; void *hi_send_buf = adesc->array_addr_p; // // setup MPI_data_type // int count = 0, blocklength = 0; long long stride = 0; if (adesc->order == MPI_ORDER_FORTRAN){ /* for XMP/F */ count = 1; blocklength = type_size; stride = ainfo[0].alloc_size * type_size; for (int i = ndims - 2; i >= target_dim; i--){ count *= ainfo[i+1].alloc_size; } for (int i = 1; i <= target_dim; i++){ blocklength *= ainfo[i-1].alloc_size; stride *= ainfo[i].alloc_size; } } else if (adesc->order == MPI_ORDER_C){ /* for XMP/C */ count = 1; blocklength = type_size; stride = ainfo[ndims-1].alloc_size * type_size; for (int i = 1; i <= target_dim; i++){ count *= ainfo[i-1].alloc_size; } for (int i = ndims - 2; i >= target_dim; i--){ blocklength *= ainfo[i+1].alloc_size; stride *= ainfo[i].alloc_size; } } else { _XMP_fatal("cannot determin the base language."); } // for lower reflect if (reflect->datatype_lo != MPI_DATATYPE_NULL){ MPI_Type_free(&reflect->datatype_lo); } MPI_Type_vector(count, blocklength * lwidth, stride, MPI_BYTE, &reflect->datatype_lo); MPI_Type_commit(&reflect->datatype_lo); // for upper reflect if (reflect->datatype_hi != MPI_DATATYPE_NULL){ MPI_Type_free(&reflect->datatype_hi); } MPI_Type_vector(count, blocklength * uwidth, stride, MPI_BYTE, &reflect->datatype_hi); MPI_Type_commit(&reflect->datatype_hi); // // calculate base address // // for lower reflect if (lwidth){ for (int i = 0; i < ndims; i++) { int lb_send, lb_recv, dim_acc; if (i == target_dim) { lb_send = ainfo[i].local_upper - lwidth + 1; lb_recv = ainfo[i].shadow_size_lo - lwidth; } else { // Note: including shadow area lb_send = 0; lb_recv = 0; } dim_acc = ainfo[i].dim_acc; lo_send_buf = (void *)((char *)lo_send_buf + lb_send * dim_acc * type_size); lo_recv_buf = (void *)((char *)lo_recv_buf + lb_recv * dim_acc * type_size); } } // for upper reflect if (uwidth){ for (int i = 0; i < ndims; i++) { int lb_send, lb_recv, dim_acc; if (i == target_dim) { lb_send = ainfo[i].local_lower; lb_recv = ainfo[i].local_upper + 1; } else { // Note: including shadow area lb_send = 0; lb_recv = 0; } dim_acc = ainfo[i].dim_acc; hi_send_buf = (void *)((char *)hi_send_buf + lb_send * dim_acc * type_size); hi_recv_buf = (void *)((char *)hi_recv_buf + lb_recv * dim_acc * type_size); } } // // initialize communication // int src, dst; if (!is_periodic && my_pos == lb_pos){ // no periodic lo_rank = MPI_PROC_NULL; } if (!is_periodic && my_pos == ub_pos){ // no periodic hi_rank = MPI_PROC_NULL; } // for lower reflect if (lwidth){ src = lo_rank; dst = hi_rank; } else { src = MPI_PROC_NULL; dst = MPI_PROC_NULL; } if (reflect->req[0] != MPI_REQUEST_NULL){ MPI_Request_free(&reflect->req[0]); } if (reflect->req[1] != MPI_REQUEST_NULL){ MPI_Request_free(&reflect->req[1]); } MPI_Recv_init(lo_recv_buf, 1, reflect->datatype_lo, src, _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req[0]); MPI_Send_init(lo_send_buf, 1, reflect->datatype_lo, dst, _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req[1]); // for upper reflect if (uwidth){ src = hi_rank; dst = lo_rank; } else { src = MPI_PROC_NULL; dst = MPI_PROC_NULL; } if (reflect->req[2] != MPI_REQUEST_NULL){ MPI_Request_free(&reflect->req[2]); } if (reflect->req[3] != MPI_REQUEST_NULL){ MPI_Request_free(&reflect->req[3]); } MPI_Recv_init(hi_recv_buf, 1, reflect->datatype_hi, src, _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req[2]); MPI_Send_init(hi_send_buf, 1, reflect->datatype_hi, dst, _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req[3]); }
static void _XMP_reflect_sched_dir(_XMP_array_t *adesc, int ishadow[], int lwidth[], int uwidth[], int is_periodic_dim[]){ int ndims = adesc->dim; _XMP_array_info_t *ainfo = adesc->info; MPI_Comm *comm = adesc->align_template->onto_nodes->comm; int my_rank = adesc->align_template->onto_nodes->comm_rank; int src = my_rank; int dst = my_rank; _XMP_async_reflect_t *async_reflect = adesc->async_reflect; MPI_Datatype *send_dtype = &async_reflect->datatype[async_reflect->nreqs]; MPI_Datatype *recv_dtype = send_dtype + 1; MPI_Request *send_req = &async_reflect->reqs[async_reflect->nreqs]; MPI_Request *recv_req = send_req + 1; int width[_XMP_N_MAX_DIM] = { 0 }; int is_periodic = 1; int at_tail = 0, at_head = 0; void *recv_buf = adesc->array_addr_p; void *send_buf = adesc->array_addr_p; // // setup neighbor nodes // for (int i = 0; i < ndims; i++){ if (ishadow[i] == 0) continue; width[i] = ishadow[i] > 0 ? uwidth[i] : lwidth[i]; is_periodic = is_periodic * is_periodic_dim[i]; _XMP_array_info_t *ai = &(adesc->info[i]); _XMP_ASSERT(ai->align_manner == _XMP_N_ALIGN_BLOCK); _XMP_ASSERT(ai->is_shadow_comm_member); if (lwidth[i] > ai->shadow_size_lo || uwidth[i] > ai->shadow_size_hi){ _XMP_fatal("reflect width is larger than shadow width."); } int tdim = ai->align_template_index; _XMP_nodes_info_t *ni = adesc->align_template->chunk[tdim].onto_nodes_info; // don't skip if no comm. is needed. //if (ni->size == 1 && !is_periodic_dim[i]) return; // 0-origin int my_pos = ni->rank; int lb_pos = _XMP_get_owner_pos(adesc, i, ai->ser_lower); int ub_pos = _XMP_get_owner_pos(adesc, i, ai->ser_upper); int src_pos; int dst_pos; if (ishadow[i] > 0){ src_pos = my_pos + 1; dst_pos = my_pos - 1; if (my_pos == lb_pos){ at_head = 1; dst_pos = ub_pos; } if (my_pos == ub_pos){ at_tail = 1; src_pos = lb_pos; } } else { //ishadow[i] < 0 src_pos = my_pos - 1; dst_pos = my_pos + 1; if (my_pos == lb_pos){ at_tail = 1; src_pos = ub_pos; } if (my_pos == ub_pos){ at_head = 1; dst_pos = lb_pos; } } src = src + (src_pos - my_pos) * ni->multiplier; dst = dst + (dst_pos - my_pos) * ni->multiplier; } src = (is_periodic || !at_tail) ? src : MPI_PROC_NULL; dst = (is_periodic || !at_head) ? dst : MPI_PROC_NULL; // // setup MPI_data_type // int sizes[_XMP_N_MAX_DIM]; int subsizes[_XMP_N_MAX_DIM]; int send_starts[_XMP_N_MAX_DIM]; int recv_starts[_XMP_N_MAX_DIM]; for (int i = 0; i < ndims; i++){ sizes[i] = ainfo[i].alloc_size; subsizes[i] = (ishadow[i] == 0) ? ainfo[i].par_size : width[i]; if (ishadow[i] == 0){ // excludes shadow area send_starts[i] = ainfo[i].shadow_size_lo; recv_starts[i] = ainfo[i].shadow_size_lo; } else if (ishadow[i] > 0){ send_starts[i] = ainfo[i].shadow_size_lo; recv_starts[i] = ainfo[i].local_upper + 1; } else { send_starts[i] = ainfo[i].local_upper - width[i] + 1; recv_starts[i] = ainfo[i].shadow_size_lo - width[i]; } } MPI_Type_create_subarray(ndims, sizes, subsizes, send_starts, adesc->order, adesc->mpi_type, send_dtype); MPI_Type_create_subarray(ndims, sizes, subsizes, recv_starts, adesc->order, adesc->mpi_type, recv_dtype); MPI_Type_commit(send_dtype); MPI_Type_commit(recv_dtype); // // initialize communication // MPI_Send_init(send_buf, 1, *send_dtype, dst, _XMP_N_MPI_TAG_REFLECT_LO, *comm, send_req); MPI_Recv_init(recv_buf, 1, *recv_dtype, src, _XMP_N_MPI_TAG_REFLECT_LO, *comm, recv_req); async_reflect->nreqs += 2; }
int main(int argc, char * argv[]) { // initialize MPI int i, rank, size, rec; int arr[100]; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); // populate a sample array, // i.e. simulated input if(rank == 0) { printf("\r\n\r\nHello from r00t. Starting\r\n\r\n"); } else { int i; for(i=0;i<=100;i++) { srand(time(NULL) * rank * i); arr[i] = rand() % 100; } } MPI_Request sreq, rreq; MPI_Status status; if(rank > 0) { int source, dest; if(rank == 1) { source = size - 1; } else { source = rank - 1; } if(rank == size - 1) { dest = 1; } else { dest = rank + 1; } // create a persistent send and a persistent recieve request MPI_Send_init(&arr[rank], 1, MPI_INT, dest, NULL, MPI_COMM_WORLD, &sreq); MPI_Recv_init(&rec, 1, MPI_INT, source, NULL, MPI_COMM_WORLD, &rreq); // once created we can use them over and over again... for(i=0; i<100; i++) { MPI_Start(&rreq); MPI_Start(&sreq); MPI_Wait(&rreq, &status); printf("My rank is %d and I received %d from %d\n", rank, rec, source); MPI_Wait(&sreq, &status); } MPI_Cancel(&rreq); MPI_Cancel(&sreq); } }
int main (int argc, char **argv) { int nprocs = -1; int rank = -1; char processor_name[128]; int namelen = 128; int buf0[buf_size]; int buf1[buf_size]; MPI_Request aReq[2]; MPI_Status aStatus[2]; MPI_Status status; /* init */ MPI_Init (&argc, &argv); MPI_Comm_size (MPI_COMM_WORLD, &nprocs); MPI_Comm_rank (MPI_COMM_WORLD, &rank); MPI_Get_processor_name (processor_name, &namelen); printf ("(%d) is alive on %s\n", rank, processor_name); fflush (stdout); MPI_Barrier (MPI_COMM_WORLD); if (nprocs < 2) { printf ("not enough tasks\n"); } else { if (rank == 0) { memset (buf0, 0, buf_size); MPI_Send_init (buf0, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &aReq[0]); MPI_Recv_init (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &aReq[1]); MPI_Start (&aReq[0]); MPI_Start (&aReq[1]); MPI_Waitall (2, aReq, aStatus); memset (buf0, 1, buf_size); MPI_Startall (2, aReq); MPI_Waitall (2, aReq, aStatus); } else if (rank == 1) { memset (buf1, 1, buf_size); MPI_Recv_init (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &aReq[0]); MPI_Send_init (buf1, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &aReq[1]); MPI_Start (&aReq[0]); MPI_Start (&aReq[1]); MPI_Waitall (2, aReq, aStatus); memset (buf1, 0, buf_size); MPI_Startall (2, aReq); MPI_Waitall (2, aReq, aStatus); } } MPI_Barrier (MPI_COMM_WORLD); MPI_Request_free (&aReq[0]); MPI_Request_free (&aReq[1]); MPI_Finalize (); printf ("(%d) Finished normally\n", rank); }
va_list ap; va_start(ap, unknown); buf = unknown; if (_numargs() == NUMPARAMS+1) { buflen = va_arg(ap, int) /8; /* This is in bits. */ } count = va_arg (ap, int *); datatype = va_arg(ap, MPI_Datatype *); dest = va_arg(ap, int *); tag = va_arg(ap, int *); comm = va_arg(ap, MPI_Comm *); request = va_arg(ap, MPI_Request *); __ierr = va_arg(ap, int *); *__ierr = MPI_Send_init(MPIR_F_PTR(buf),*count, *datatype,*dest,*tag,*comm,&lrequest); *(int*)request = MPIR_FromPointer( lrequest ); } #else void mpi_send_init_( buf, count, datatype, dest, tag, comm, request, __ierr ) void *buf; int*count; MPI_Datatype *datatype; int*dest; int*tag; MPI_Comm *comm; MPI_Request *request; int *__ierr; {
int main(int argc, char **argv) { int numtasks, rank; int rank_dst, ping_side; // Initialise MPI MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &numtasks); if (numtasks != 2) { printf("Need 2 processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); exit(1); } ping_side = !(rank & 1); rank_dst = ping_side?(rank | 1) : (rank & ~1); if (ping_side) { int x=42, y; MPI_Request send_request; MPI_Request recv_request; MPI_Send_init(&x, 1, MPI_INT, rank_dst, 1, MPI_COMM_WORLD, &send_request); MPI_Start(&send_request); MPI_Wait(&send_request, MPI_STATUS_IGNORE); MPI_Start(&send_request); MPI_Wait(&send_request, MPI_STATUS_IGNORE); MPI_Recv_init(&y, 1, MPI_INT, rank_dst, 1, MPI_COMM_WORLD, &recv_request); MPI_Start(&recv_request); MPI_Wait(&recv_request, MPI_STATUS_IGNORE); if (y == 42) printf("success\n"); else printf("failure\n"); MPI_Start(&recv_request); MPI_Wait(&recv_request, MPI_STATUS_IGNORE); if (y == 42) printf("success\n"); else printf("failure\n"); } else { int x, y; MPI_Recv(&x, 1, MPI_INT, rank_dst, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Recv(&y, 1, MPI_INT, rank_dst, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Send(&y, 1, MPI_INT, rank_dst, 1, MPI_COMM_WORLD); MPI_Send(&y, 1, MPI_INT, rank_dst, 1, MPI_COMM_WORLD); if (x == 42) printf("success\n"); else printf("failure\n"); if (y == 42) printf("success\n"); else printf("failure\n"); } MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); exit(0); }