/*@ MPI_Type_create_indexed_block - Creates an indexed datatype with constant sized blocks Input Parameters: + count - number of blocks -- also number of entries in indices and blocklens . blocklength - number of elements in each block (integer) . array_of_displacements - displacement of each block in multiples of old_type (array of integer) - old_type - old datatype (handle) Output Parameter: . newtype - new datatype (handle) .N fortran The indices are displacements, and are based on a zero origin. A common error is to do something like to following .vb integer a(100) integer blens(10), indices(10) do i=1,10 10 indices(i) = 1 + (i-1)*10 call MPI_TYPE_CREATE_INDEXED_BLOCK(10,1,indices,MPI_INTEGER,newtype,ierr) call MPI_TYPE_COMMIT(newtype,ierr) call MPI_SEND(a,1,newtype,...) .ve expecting this to send 'a(1),a(11),...' because the indices have values '1,11,...'. Because these are `displacements` from the beginning of 'a', it actually sends 'a(1+1),a(1+11),...'. If you wish to consider the displacements as indices into a Fortran array, consider declaring the Fortran array with a zero origin .vb integer a(0:99) .ve .N Errors .N MPI_ERR_COUNT .N MPI_ERR_TYPE .N MPI_ERR_ARG .N MPI_ERR_EXHAUSTED @*/ int MPI_Type_create_indexed_block( int count, int blocklength, int array_of_displacements[], MPI_Datatype old_type, MPI_Datatype *newtype ) { MPI_Aint *hindices; int *blocklens; int i, mpi_errno = MPI_SUCCESS; struct MPIR_DATATYPE *old_dtype_ptr; static char myname[] = "MPI_TYPE_CREATE_INDEXED_BLOCK"; MPIR_ERROR_DECL; TR_PUSH(myname); /* Check for bad arguments */ old_dtype_ptr = MPIR_GET_DTYPE_PTR(old_type); MPIR_TEST_DTYPE(old_type,old_dtype_ptr,MPIR_COMM_WORLD,myname); if ( ( (count < 0) && (mpi_errno = MPI_ERR_COUNT) ) || ( (old_dtype_ptr->dte_type == MPIR_UB) && (mpi_errno = MPI_ERR_TYPE) ) || ( (old_dtype_ptr->dte_type == MPIR_LB) && (mpi_errno = MPI_ERR_TYPE) ) ) return MPIR_ERROR( MPIR_COMM_WORLD, mpi_errno,myname); if (blocklength < 0) { mpi_errno = MPIR_Err_setmsg( MPI_ERR_ARG, MPIR_ERR_ARG_NAMED, myname, (char *)0, (char *)0, "blocklength", blocklength ); return MPIR_ERROR( MPIR_COMM_WORLD, mpi_errno, myname ); } /* Are we making a null datatype? */ if (blocklength == 0) { return MPI_Type_contiguous( 0, MPI_INT, newtype ); } /* Generate a call to MPI_Type_hindexed instead. This means allocating a temporary displacement array, multiplying all displacements by extent(old_type), and using that */ MPIR_ALLOC(hindices,(MPI_Aint *)MALLOC(count*sizeof(MPI_Aint)), MPIR_COMM_WORLD,MPI_ERR_EXHAUSTED,myname); MPIR_ALLOC(blocklens,(int *)MALLOC(count*sizeof(int)),MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED,myname); for (i=0; i<count; i++) { hindices[i] = (MPI_Aint)array_of_displacements[i] * old_dtype_ptr->extent; blocklens[i] = blocklength; } MPIR_ERROR_PUSH(MPIR_COMM_WORLD); mpi_errno = MPI_Type_hindexed( count, blocklens, hindices, old_type, newtype ); MPIR_ERROR_POP(MPIR_COMM_WORLD); FREE(hindices); FREE(blocklens); TR_POP; MPIR_RETURN(MPIR_COMM_WORLD,mpi_errno, myname); }
static int intra_Reduce(void *sendbuf, void *recvbuf, int count, struct MPIR_DATATYPE *datatype, MPI_Op op, int root, struct MPIR_COMMUNICATOR *comm) { MPI_Status status; int size, rank; int mask, relrank, source, lroot; int mpi_errno = MPI_SUCCESS; MPI_User_function *uop; MPI_Aint lb, ub, m_extent; /* Extent in memory */ void *buffer; struct MPIR_OP *op_ptr; static char myname[] = "MPI_REDUCE"; MPIR_ERROR_DECL; mpi_comm_err_ret = 0; /* Is root within the communicator? */ MPIR_Comm_size(comm, &size); #ifndef MPIR_NO_ERROR_CHECKING if (root >= size) mpi_errno = MPIR_Err_setmsg(MPI_ERR_ROOT, MPIR_ERR_ROOT_TOOBIG, myname, (char *) 0, (char *) 0, root, size); if (root < 0) mpi_errno = MPIR_Err_setmsg(MPI_ERR_ROOT, MPIR_ERR_DEFAULT, myname, (char *) 0, (char *) 0, root); if (mpi_errno) return MPIR_ERROR(comm, mpi_errno, myname); #endif /* See the overview in Collection Operations for why this is ok */ if (count == 0) return MPI_SUCCESS; /* If the operation is predefined, we could check that the datatype's type signature is compatible with the operation. */ #ifdef MPID_Reduce /* Eventually, this could apply the MPID_Reduce routine in a loop for counts > 1 */ if (comm->ADIReduce && count == 1) { /* Call a routine to sort through the datatypes and operations ... This allows us to provide partial support (e.g., only SUM_DOUBLE) */ if (MPIR_ADIReduce(comm->ADIctx, comm, sendbuf, recvbuf, count, datatype->self, op, root) == MPI_SUCCESS) return MPI_SUCCESS; } #endif /* Get my rank and switch communicators to the hidden collective */ MPIR_Comm_rank(comm, &rank); comm = comm->comm_coll; op_ptr = MPIR_GET_OP_PTR(op); MPIR_TEST_MPI_OP(op, op_ptr, comm, myname); uop = op_ptr->op; /* Here's the algorithm. Relative to the root, look at the bit pattern in my rank. Starting from the right (lsb), if the bit is 1, send to the node with that bit zero and exit; if the bit is 0, receive from the node with that bit set and combine (as long as that node is within the group) Note that by receiving with source selection, we guarentee that we get the same bits with the same input. If we allowed the parent to receive the children in any order, then timing differences could cause different results (roundoff error, over/underflows in some cases, etc). Because of the way these are ordered, if root is 0, then this is correct for both commutative and non-commutitive operations. If root is not 0, then for non-commutitive, we use a root of zero and then send the result to the root. To see this, note that the ordering is mask = 1: (ab)(cd)(ef)(gh) (odds send to evens) mask = 2: ((ab)(cd))((ef)(gh)) (3,6 send to 0,4) mask = 4: (((ab)(cd))((ef)(gh))) (4 sends to 0) Comments on buffering. If the datatype is not contiguous, we still need to pass contiguous data to the user routine. In this case, we should make a copy of the data in some format, and send/operate on that. In general, we can't use MPI_PACK, because the alignment of that is rather vague, and the data may not be re-usable. What we actually need is a "squeeze" operation that removes the skips. */ /* Make a temporary buffer */ MPIR_Type_get_limits(datatype, &lb, &ub); m_extent = ub - lb; /* MPI_Type_extent ( datatype, &extent ); */ MPIR_ALLOC(buffer, (void *) MALLOC(m_extent * count), comm, MPI_ERR_EXHAUSTED, "MPI_REDUCE"); buffer = (void *) ((char *) buffer - lb); /* If I'm not the root, then my recvbuf may not be valid, therefore I have to allocate a temporary one */ if (rank != root) { MPIR_ALLOC(recvbuf, (void *) MALLOC(m_extent * count), comm, MPI_ERR_EXHAUSTED, "MPI_REDUCE"); recvbuf = (void *) ((char *) recvbuf - lb); } /* This code isn't correct if the source is a more complex datatype */ memcpy(recvbuf, sendbuf, m_extent * count); mask = 0x1; if (op_ptr->commute) lroot = root; else lroot = 0; relrank = (rank - lroot + size) % size; /* Lock for collective operation */ MPID_THREAD_LOCK(comm->ADIctx, comm); MPIR_Op_errno = MPI_SUCCESS; while ( /*(mask & relrank) == 0 && */ mask < size) { /* Receive */ if ((mask & relrank) == 0) { source = (relrank | mask); if (source < size) { source = (source + lroot) % size; mpi_errno = MPI_Recv(buffer, count, datatype->self, source, MPIR_REDUCE_TAG, comm->self, &status); if (mpi_errno) return MPIR_ERROR(comm, mpi_errno, myname); /* The sender is above us, so the received buffer must be the second argument (in the noncommutitive case). */ /* error pop/push allows errors found by predefined routines to be visible. We need a better way to do this */ /* MPIR_ERROR_POP(comm); */ if (op_ptr->commute) (*uop) (buffer, recvbuf, &count, &datatype->self); else { (*uop) (recvbuf, buffer, &count, &datatype->self); /* short term hack to keep recvbuf up-to-date */ memcpy(recvbuf, buffer, m_extent * count); } /* MPIR_ERROR_PUSH(comm); */ } } else { /* I've received all that I'm going to. Send my result to my parent */ source = ((relrank & (~mask)) + lroot) % size; mpi_errno = MPI_Send(recvbuf, count, datatype->self, source, MPIR_REDUCE_TAG, comm->self); if (mpi_errno) return MPIR_ERROR(comm, mpi_errno, myname); break; } mask <<= 1; } FREE((char *) buffer + lb); if (!op_ptr->commute && root != 0) { if (rank == 0) { mpi_errno = MPI_Send(recvbuf, count, datatype->self, root, MPIR_REDUCE_TAG, comm->self); } else if (rank == root) { mpi_errno = MPI_Recv(recvbuf, count, datatype->self, 0, /*size-1, */ MPIR_REDUCE_TAG, comm->self, &status); } } /* Free the temporarily allocated recvbuf */ if (rank != root) FREE((char *) recvbuf + lb); /* If the predefined operation detected an error, report it here */ /* Note that only the root gets this result, so this can cause programs to hang, particularly if this is used to implement MPI_Allreduce. Use care with this. */ if (mpi_errno == MPI_SUCCESS && MPIR_Op_errno) { /* PRINTF( "Error in performing MPI_Op in reduce\n" ); */ mpi_errno = MPIR_Op_errno; } /* Unlock for collective operation */ MPID_THREAD_UNLOCK(comm->ADIctx, comm); return (mpi_errno); }
static int intra_Alltoallv(void *sendbuf, int *sendcnts, int *sdispls, struct MPIR_DATATYPE *sendtype, void *recvbuf, int *recvcnts, int *rdispls, struct MPIR_DATATYPE *recvtype, struct MPIR_COMMUNICATOR *comm) { int size, i, j, rcnt; MPI_Aint send_extent, recv_extent; int mpi_errno = MPI_SUCCESS; MPI_Status *starray; MPI_Request *reqarray; /* Get size and switch to collective communicator */ MPIR_Comm_size(comm, &size); comm = comm->comm_coll; /* Get extent of send and recv types */ MPI_Type_extent(sendtype->self, &send_extent); MPI_Type_extent(recvtype->self, &recv_extent); /* Lock for collective operation */ MPID_THREAD_LOCK(comm->ADIctx, comm); /* 1st, get some storage from the heap to hold handles, etc. */ MPIR_ALLOC(starray, (MPI_Status *) MALLOC(2 * size * sizeof(MPI_Status)), comm, MPI_ERR_EXHAUSTED, "MPI_ALLTOALLV"); MPIR_ALLOC(reqarray, (MPI_Request *) MALLOC(2 * size * sizeof(MPI_Request)), comm, MPI_ERR_EXHAUSTED, "MPI_ALLTOALLV"); /* do the communication -- post *all* sends and receives: */ rcnt = 0; for (i = 0; i < size; i++) { reqarray[2 * i] = MPI_REQUEST_NULL; if ((mpi_errno = MPI_Irecv((void *) ((char *) recvbuf + rdispls[i] * recv_extent), recvcnts[i], recvtype->self, i, MPIR_ALLTOALLV_TAG, comm->self, &reqarray[2 * i + 1])) ) break; rcnt++; if ((mpi_errno = MPI_Isend((void *) ((char *) sendbuf + sdispls[i] * send_extent), sendcnts[i], sendtype->self, i, MPIR_ALLTOALLV_TAG, comm->self, &reqarray[2 * i])) ) break; rcnt++; } /* ... then wait for *all* of them to finish: */ if (mpi_errno) { /* We should really cancel all of the active requests */ for (j = 0; j < rcnt; j++) { MPI_Cancel(&reqarray[j]); } } else { mpi_errno = MPI_Waitall(2 * size, reqarray, starray); if (mpi_errno == MPI_ERR_IN_STATUS) { for (j = 0; j < 2 * size; j++) { if (starray[j].MPI_ERROR != MPI_SUCCESS) mpi_errno = starray[j].MPI_ERROR; } } } /* clean up */ FREE(reqarray); FREE(starray); /* Unlock for collective operation */ MPID_THREAD_UNLOCK(comm->ADIctx, comm); return (mpi_errno); }
static int intra_Alltoall(void *sendbuf, int sendcount, struct MPIR_DATATYPE *sendtype, void *recvbuf, int recvcnt, struct MPIR_DATATYPE *recvtype, struct MPIR_COMMUNICATOR *comm) { int size, i, j; int me; MPI_Aint send_extent, recv_extent; int mpi_errno = MPI_SUCCESS; MPI_Status *starray; MPI_Request *reqarray; static char myname[] = "MPI_ALLTOALL"; /* Get size and switch to collective communicator */ MPIR_Comm_size(comm, &size); MPIR_Comm_rank(comm, &me); comm = comm->comm_coll; /* Get extent of send and recv types */ MPI_Type_extent(sendtype->self, &send_extent); MPI_Type_extent(recvtype->self, &recv_extent); /* Lock for collective operation */ MPID_THREAD_LOCK(comm->ADIctx, comm); /* */ /* 1st, get some storage from the heap to hold handles, etc. */ MPIR_ALLOC(starray, (MPI_Status *) MALLOC(2 * size * sizeof(MPI_Status)), comm, MPI_ERR_EXHAUSTED, myname); MPIR_ALLOC(reqarray, (MPI_Request *) MALLOC(2 * size * sizeof(MPI_Request)), comm, MPI_ERR_EXHAUSTED, myname); /* do the communication -- post *all* sends and receives: */ /* ServerNet Optimization. Post all receives then synchronously cycle through all of the sends, */ for (i = 0; i < size; i++) { /* We'd like to avoid sending and receiving to ourselves; however, this is complicated by the presence of different sendtype and recvtypes. */ if ((mpi_errno = MPI_Irecv((void *) ((char *) recvbuf + (((i + me) % size) * recvcnt * recv_extent)), recvcnt, recvtype->self, ((i + me) % size), MPIR_ALLTOALL_TAG, comm->self, &reqarray[i])) ) break; } for (i = 0; i < size; i++) { MPI_Barrier(comm->self); if ((mpi_errno = MPI_Send((void *) ((char *) sendbuf + ((i + me) % size) * sendcount * send_extent), sendcount, sendtype->self, ((i + me) % size), MPIR_ALLTOALL_TAG, comm->self)) ) break; } if (mpi_errno) return mpi_errno; /* ... then wait for *all* of them to finish: */ mpi_errno = MPI_Waitall(size, reqarray, starray); if (mpi_errno == MPI_ERR_IN_STATUS) { for (j = 0; j < size; j++) { if (starray[j].MPI_ERROR != MPI_SUCCESS) mpi_errno = starray[j].MPI_ERROR; } } /* clean up */ FREE(starray); FREE(reqarray); /* Unlock for collective operation */ MPID_THREAD_UNLOCK(comm->ADIctx, comm); return (mpi_errno); }
static int intra_Scan(void *sendbuf, void *recvbuf, int count, struct MPIR_DATATYPE *datatype, MPI_Op op, struct MPIR_COMMUNICATOR *comm) { MPI_Status status; int rank, size; int mpi_errno = MPI_SUCCESS; MPI_Aint lb, ub, m_extent; /* Extent in memory */ MPI_User_function *uop; struct MPIR_OP *op_ptr; MPIR_ERROR_DECL; mpi_comm_err_ret = 0; /* See the overview in Collection Operations for why this is ok */ if (count == 0) return MPI_SUCCESS; /* Get my rank & size and switch communicators to the hidden collective */ MPIR_Comm_size(comm, &size); MPIR_Comm_rank(comm, &rank); MPIR_Type_get_limits(datatype, &lb, &ub); m_extent = ub - lb; comm = comm->comm_coll; op_ptr = MPIR_GET_OP_PTR(op); MPIR_TEST_MPI_OP(op, op_ptr, comm, "MPI_SCAN"); uop = op_ptr->op; /* Lock for collective operation */ MPID_THREAD_LOCK(comm->ADIctx, comm); /* This is an O(size) algorithm. A modification of the algorithm in reduce.c can be used to make this O(log(size)) */ /* commutative case requires no extra buffering */ MPIR_Op_errno = MPI_SUCCESS; if (op_ptr->commute) { /* Do the scan operation */ if (rank > 0) { mpi_errno = MPI_Recv(recvbuf, count, datatype->self, rank - 1, MPIR_SCAN_TAG, comm->self, &status); if (mpi_errno) return mpi_errno; /* See reduce for why pop/push */ MPIR_ERROR_POP(comm); (*uop) (sendbuf, recvbuf, &count, &datatype->self); MPIR_ERROR_PUSH(comm); } else { MPIR_COPYSELF(sendbuf, count, datatype->self, recvbuf, MPIR_SCAN_TAG, rank, comm->self); if (mpi_errno) return mpi_errno; } } /* non-commutative case requires extra buffering */ else { /* Do the scan operation */ if (rank > 0) { void *tmpbuf; MPIR_ALLOC(tmpbuf, (void *) MALLOC(m_extent * count), comm, MPI_ERR_EXHAUSTED, "MPI_SCAN"); tmpbuf = (void *) ((char *) tmpbuf - lb); MPIR_COPYSELF(sendbuf, count, datatype->self, recvbuf, MPIR_SCAN_TAG, rank, comm->self); if (mpi_errno) return mpi_errno; mpi_errno = MPI_Recv(tmpbuf, count, datatype->self, rank - 1, MPIR_SCAN_TAG, comm->self, &status); if (mpi_errno) return mpi_errno; (*uop) (tmpbuf, recvbuf, &count, &datatype->self); FREE((char *) tmpbuf + lb); } else { MPIR_COPYSELF(sendbuf, count, datatype->self, recvbuf, MPIR_SCAN_TAG, rank, comm->self); if (mpi_errno) return mpi_errno; } } /* send the letter to destination */ if (rank < (size - 1)) mpi_errno = MPI_Send(recvbuf, count, datatype->self, rank + 1, MPIR_SCAN_TAG, comm->self); /* If the predefined operation detected an error, report it here */ if (mpi_errno == MPI_SUCCESS && MPIR_Op_errno) mpi_errno = MPIR_Op_errno; /* Unlock for collective operation */ MPID_THREAD_UNLOCK(comm->ADIctx, comm); return (mpi_errno); }
static int intra_Reduce_scatter(void *sendbuf, void *recvbuf, int *recvcnts, struct MPIR_DATATYPE *datatype, MPI_Op op, struct MPIR_COMMUNICATOR *comm) { int rank, size, i, count = 0; MPI_Aint lb, ub, m_extent; /* Extent in memory */ int *displs; void *buffer; int mpi_errno = MPI_SUCCESS, rc; static char myname[] = "MPI_REDUCE_SCATTER"; /* Determine the "count" of items to reduce and set the displacements */ MPIR_Type_get_limits(datatype, &lb, &ub); m_extent = ub - lb; /* MPI_Type_extent (datatype, &extent); */ MPIR_Comm_size(comm, &size); MPIR_Comm_rank(comm, &rank); /* Allocate the displacements and initialize them */ MPIR_ALLOC(displs, (int *) MALLOC(size * sizeof(int)), comm, MPI_ERR_EXHAUSTED, myname); for (i = 0; i < size; i++) { displs[i] = count; count += recvcnts[i]; if (recvcnts[i] < 0) { FREE(displs); mpi_errno = MPIR_Err_setmsg(MPI_ERR_COUNT, MPIR_ERR_COUNT_ARRAY_NEG, myname, (char *) 0, (char *) 0, i, recvcnts[i]); return mpi_errno; } } /* Allocate a temporary buffer */ if (count == 0) { FREE(displs); return MPI_SUCCESS; } MPIR_ALLOC(buffer, (void *) MALLOC(m_extent * count), comm, MPI_ERR_EXHAUSTED, myname); buffer = (void *) ((char *) buffer - lb); /* Reduce to 0, then scatter */ mpi_errno = MPI_Reduce(sendbuf, buffer, count, datatype->self, op, 0, comm->self); if (mpi_errno == MPI_SUCCESS || mpi_errno == MPIR_ERR_OP_NOT_DEFINED) { rc = MPI_Scatterv(buffer, recvcnts, displs, datatype->self, recvbuf, recvcnts[rank], datatype->self, 0, comm->self); if (rc) mpi_errno = rc; } /* Free the temporary buffers */ FREE((char *) buffer + lb); FREE(displs); return (mpi_errno); }
int MPIR_intra_Scan ( void *sendbuf, void *recvbuf, int count, struct MPIR_DATATYPE *datatype, MPI_Op op, struct MPIR_COMMUNICATOR *comm ) { MPI_Status status; int rank, size; int mpi_errno = MPI_SUCCESS; MPI_Aint lb, ub, m_extent; /* Extent in memory */ MPI_User_function *uop; struct MPIR_OP *op_ptr; MPIR_ERROR_DECL; int dd; /* displacement, no of hops to send (power of 2) */ int rr; /* "round rank" */ void *tmpbuf; mpi_comm_err_ret = 0; /* Nov. 98: Improved O(log(size)) algorithm */ /* See the overview in Collection Operations for why this is ok */ if (count == 0) return MPI_SUCCESS; /* Get my rank & size and switch communicators to the hidden collective */ MPIR_Comm_size ( comm, &size ); MPIR_Comm_rank ( comm, &rank ); MPIR_Type_get_limits( datatype, &lb, &ub ); m_extent = ub - lb; comm = comm->comm_coll; op_ptr = MPIR_GET_OP_PTR(op); MPIR_TEST_MPI_OP(op,op_ptr,comm,"MPI_SCAN"); uop = op_ptr->op; /* Lock for collective operation */ MPID_THREAD_DS_LOCK(comm); MPIR_Op_errno = MPI_SUCCESS; if (rank>0) { /* allocate temporary receive buffer (needed both in commutative and noncommutative case) */ MPIR_ALLOC(tmpbuf,(void *)MALLOC(m_extent * count), comm, MPI_ERR_EXHAUSTED, "Out of space in MPI_SCAN" ); tmpbuf = (void *)((char*)tmpbuf-lb); } MPIR_COPYSELF( sendbuf, count, datatype->self, recvbuf, MPIR_SCAN_TAG, rank, comm->self ); /* compute partial scans */ rr = rank; dd = 1; while ((rr&1)==1) { /* odd "round rank"s receive */ mpi_errno = MPI_Recv(tmpbuf,count,datatype->self,rank-dd, MPIR_SCAN_TAG,comm->self,&status); if (mpi_errno) return mpi_errno; #ifdef WIN32 if(op_ptr->stdcall) op_ptr->op_s(tmpbuf, recvbuf, &count, &datatype->self); else #endif (*uop)(tmpbuf, recvbuf, &count, &datatype->self); dd <<= 1; /* dd*2 */ rr >>= 1; /* rr/2 */ /* Invariant: recvbuf contains the scan of (rank-dd)+1, (rank-dd)+2,..., rank */ } /* rr even, rank==rr*dd+dd-1, recvbuf contains the scan of rr*dd, rr*dd+1,..., rank */ /* send partial scan forwards */ if (rank+dd<size) { mpi_errno = MPI_Send(recvbuf,count,datatype->self,rank+dd,MPIR_SCAN_TAG, comm->self); if (mpi_errno) return mpi_errno; } if (rank-dd>=0) { mpi_errno = MPI_Recv(tmpbuf,count,datatype->self,rank-dd, MPIR_SCAN_TAG,comm->self,&status); if (mpi_errno) return mpi_errno; #ifdef WIN32 if(op_ptr->stdcall) op_ptr->op_s(tmpbuf, recvbuf, &count, &datatype->self); else #endif (*uop)(tmpbuf, recvbuf, &count, &datatype->self); /* recvbuf contains the scan of 0,..., rank */ } /* send result forwards */ do { dd >>= 1; /* dd/2 */ } while (rank+dd>=size); while (dd>0) { mpi_errno = MPI_Send(recvbuf,count,datatype->self,rank+dd,MPIR_SCAN_TAG, comm->self); if (mpi_errno) return mpi_errno; dd >>= 1; /* dd/2 */ } if (rank>0) { /* free temporary receive buffer */ FREE((char*)tmpbuf+lb); } /* If the predefined operation detected an error, report it here */ if (mpi_errno == MPI_SUCCESS && MPIR_Op_errno) mpi_errno = MPIR_Op_errno; /* Unlock for collective operation */ MPID_THREAD_DS_UNLOCK(comm); return(mpi_errno); }
int MPIR_intra_Scan ( void *sendbuf, void *recvbuf, int count, struct MPIR_DATATYPE *datatype, MPI_Op op, struct MPIR_COMMUNICATOR *comm ) { MPI_Status status; int rank, size; int mpi_errno = MPI_SUCCESS; MPI_User_function *uop; struct MPIR_OP *op_ptr; int mask, dst; MPI_Aint extent, lb; void *partial_scan, *tmp_buf; static char myname[] = "MPI_SCAN"; if (count == 0) return MPI_SUCCESS; MPIR_Comm_size(comm, &size); MPIR_Comm_rank(comm, &rank); /* Switch communicators to the hidden collective */ comm = comm->comm_coll; /* Lock for collective operation */ MPID_THREAD_LOCK(comm->ADIctx,comm); op_ptr = MPIR_GET_OP_PTR(op); MPIR_TEST_MPI_OP(op,op_ptr,comm,myname); uop = op_ptr->op; /* need to allocate temporary buffer to store partial scan*/ MPI_Type_extent(datatype->self, &extent); MPIR_ALLOC(partial_scan,(void *)MALLOC(count*extent), comm, MPI_ERR_EXHAUSTED, myname); /* adjust for potential negative lower bound in datatype */ MPI_Type_lb( datatype->self, &lb ); partial_scan = (void *)((char*)partial_scan - lb); /* need to allocate temporary buffer to store incoming data*/ MPIR_ALLOC(tmp_buf,(void *)MALLOC(count*extent), comm, MPI_ERR_EXHAUSTED, myname); /* adjust for potential negative lower bound in datatype */ tmp_buf = (void *)((char*)tmp_buf - lb); /* Since this is an inclusive scan, copy local contribution into recvbuf. */ mpi_errno = MPI_Sendrecv ( sendbuf, count, datatype->self, rank, MPIR_SCAN_TAG, recvbuf, count, datatype->self, rank, MPIR_SCAN_TAG, comm->self, &status ); if (mpi_errno) return mpi_errno; mpi_errno = MPI_Sendrecv ( sendbuf, count, datatype->self, rank, MPIR_SCAN_TAG, partial_scan, count, datatype->self, rank, MPIR_SCAN_TAG, comm->self, &status ); if (mpi_errno) return mpi_errno; mask = 0x1; while (mask < size) { dst = rank ^ mask; if (dst < size) { /* Send partial_scan to dst. Recv into tmp_buf */ mpi_errno = MPI_Sendrecv(partial_scan, count, datatype->self, dst, MPIR_SCAN_TAG, tmp_buf, count, datatype->self, dst, MPIR_SCAN_TAG, comm->self, &status); if (mpi_errno) return mpi_errno; if (rank > dst) { (*uop)(tmp_buf, partial_scan, &count, &datatype->self); (*uop)(tmp_buf, recvbuf, &count, &datatype->self); } else { if (op_ptr->commute) (*uop)(tmp_buf, partial_scan, &count, &datatype->self); else { (*uop)(partial_scan, tmp_buf, &count, &datatype->self); mpi_errno = MPI_Sendrecv(tmp_buf, count, datatype->self, rank, MPIR_SCAN_TAG, partial_scan, count, datatype->self, rank, MPIR_SCAN_TAG, comm->self, &status); if (mpi_errno) return mpi_errno; } } } mask <<= 1; } FREE((char *)partial_scan+lb); FREE((char *)tmp_buf+lb); /* Unlock for collective operation */ MPID_THREAD_UNLOCK(comm->ADIctx,comm); return (mpi_errno); }
/*@ MPI_Type_struct - Creates a struct datatype Input Parameters: + count - number of blocks (integer) -- also number of entries in arrays array_of_types , array_of_displacements and array_of_blocklengths . blocklens - number of elements in each block (array) . indices - byte displacement of each block (array) - old_types - type of elements in each block (array of handles to datatype objects) Output Parameter: . newtype - new datatype (handle) Notes: If an upperbound is set explicitly by using the MPI datatype 'MPI_UB', the corresponding index must be positive. The MPI standard originally made vague statements about padding and alignment; this was intended to allow the simple definition of structures that could be sent with a count greater than one. For example, .vb struct { int a; char b; } foo; .ve may have 'sizeof(foo) > sizeof(int) + sizeof(char)'; for example, 'sizeof(foo) == 2*sizeof(int)'. The initial version of the MPI standard defined the extent of a datatype as including an `epsilon` that would have allowed an implementation to make the extent an MPI datatype for this structure equal to '2*sizeof(int)'. However, since different systems might define different paddings, there was much discussion by the MPI Forum about what was the correct value of epsilon, and one suggestion was to define epsilon as zero. This would have been the best thing to do in MPI 1.0, particularly since the 'MPI_UB' type allows the user to easily set the end of the structure. Unfortunately, this change did not make it into the final document. Currently, this routine does not add any padding, since the amount of padding needed is determined by the compiler that the user is using to build their code, not the compiler used to construct the MPI library. A later version of MPICH may provide for some natural choices of padding (e.g., multiple of the size of the largest basic member), but users are advised to never depend on this, even with vendor MPI implementations. Instead, if you define a structure datatype and wish to send or receive multiple items, you should explicitly include an 'MPI_UB' entry as the last member of the structure. For example, the following code can be used for the structure foo .vb blen[0] = 1; indices[0] = 0; oldtypes[0] = MPI_INT; blen[1] = 1; indices[1] = &foo.b - &foo; oldtypes[1] = MPI_CHAR; blen[2] = 1; indices[2] = sizeof(foo); oldtypes[2] = MPI_UB; MPI_Type_struct( 3, blen, indices, oldtypes, &newtype ); .ve .N fortran .N Errors .N MPI_SUCCESS .N MPI_ERR_TYPE .N MPI_ERR_COUNT .N MPI_ERR_EXHAUSTED @*/ int MPI_Type_struct( int count, int blocklens[], MPI_Aint indices[], MPI_Datatype old_types[], MPI_Datatype *newtype ) { struct MPIR_DATATYPE* dteptr; MPI_Aint ub, lb, high, low, real_ub, real_lb, real_init; int high_init = 0, low_init = 0; int i, mpi_errno = MPI_SUCCESS; MPI_Aint ub_marker = 0, lb_marker = 0; /* to suppress warnings */ MPI_Aint ub_found = 0, lb_found = 0; int size, total_count; static char myname[] = "MPI_TYPE_STRUCT"; disableSignal(); /* Check for bad arguments */ if ( count < 0 ) { mpi_errno = MPIR_Err_setmsg( MPI_ERR_COUNT, MPIR_ERR_DEFAULT, myname, (char *)0, (char *)0, count ); revertSignal(); return MPIR_ERROR( MPIR_COMM_WORLD, mpi_errno, myname ); } if (count == 0) { revertSignal(); return MPI_Type_contiguous( 0, MPI_INT, newtype ); } /* Check blocklens and old_types arrays and find number of bound */ /* markers */ total_count = 0; for (i=0; i<count; i++) { total_count += blocklens[i]; if ( blocklens[i] < 0) { mpi_errno = MPIR_Err_setmsg( MPI_ERR_ARG, MPIR_ERR_ARG_ARRAY_VAL, myname, (char *)0, (char *)0, "blocklens", i, blocklens[i] ); revertSignal(); return MPIR_ERROR( MPIR_COMM_WORLD, mpi_errno,myname); } if ( old_types[i] == MPI_DATATYPE_NULL ) { mpi_errno = MPIR_Err_setmsg( MPI_ERR_TYPE, MPIR_ERR_TYPE_ARRAY_NULL, myname, (char *)0, (char *)0, "old_types", i ); revertSignal(); return MPIR_ERROR( MPIR_COMM_WORLD, mpi_errno, myname ); } } if (total_count == 0) { revertSignal(); return MPI_Type_contiguous( 0, MPI_INT, newtype ); } /* Create and fill in the datatype */ MPIR_ALLOC(dteptr,(struct MPIR_DATATYPE *) MPIR_SBalloc( MPIR_dtes ),MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED, myname ); *newtype = (MPI_Datatype) MPIR_FromPointer( dteptr ); dteptr->self = *newtype; MPIR_SET_COOKIE(dteptr,MPIR_DATATYPE_COOKIE) dteptr->dte_type = MPIR_STRUCT; dteptr->committed = 0; dteptr->basic = 0; dteptr->permanent = 0; dteptr->is_contig = 0; dteptr->ref_count = 1; dteptr->count = count; dteptr->elements = 0; dteptr->size = 0; dteptr->align = 1; dteptr->has_ub = 0; dteptr->has_lb = 0; dteptr->self = *newtype; /* Create indices and blocklens arrays and fill them */ dteptr->indices = ( MPI_Aint * ) MALLOC( count * sizeof( MPI_Aint ) ); dteptr->blocklens = ( int * ) MALLOC( count * sizeof( int ) ); dteptr->old_types = ( struct MPIR_DATATYPE ** )MALLOC(count*sizeof(struct MPIR_DATATYPE *)); if (!dteptr->indices || !dteptr->blocklens || !dteptr->old_types) { revertSignal(); return MPIR_ERROR( MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED, "MPI_TYPE_STRUCT" ); } high = low = ub = lb = 0; real_ub = real_lb = 0; real_init = 0; /* If data alignment is 2, 4, or 8, then assign dteptr->align to that value. If 0, then assign dteptr->align to the maximal alignment requirement. (done below) */ if (ALIGNMENT_VALUE > 0) dteptr->align = ALIGNMENT_VALUE; for (i = 0; i < count; i++) { struct MPIR_DATATYPE *old_dtype_ptr; old_dtype_ptr = MPIR_GET_DTYPE_PTR(old_types[i]); MPIR_TEST_DTYPE(old_types[i],old_dtype_ptr,MPIR_COMM_WORLD, "MPI_TYPE_STRUCT"); dteptr->old_types[i] = MPIR_Type_dup (old_dtype_ptr); dteptr->indices[i] = indices[i]; dteptr->blocklens[i] = blocklens[i]; /* Keep track of maximal alignment requirement */ if (ALIGNMENT_VALUE == 0) { if (dteptr->align < old_dtype_ptr->align) dteptr->align = old_dtype_ptr->align; } if ( old_dtype_ptr->dte_type == MPIR_UB ) { if (ub_found) { if (indices[i] > ub_marker) ub_marker = indices[i]; } else { ub_marker = indices[i]; ub_found = 1; } } else if ( old_dtype_ptr->dte_type == MPIR_LB ) { if (lb_found) { if ( indices[i] < lb_marker ) { lb_marker = indices[i]; } } else { lb_marker = indices[i]; lb_found = 1; } } else { /* Since the datatype is NOT a UB or LB, save the real limits */ if (!real_init) { real_init = 1; real_lb = old_dtype_ptr->real_lb; real_ub = old_dtype_ptr->real_ub; } else { if (old_dtype_ptr->real_lb < real_lb) real_lb = old_dtype_ptr->real_lb; if (old_dtype_ptr->real_ub > real_ub) real_ub = old_dtype_ptr->real_ub; } /* Next, check to see if datatype has an MPI_LB or MPI_UB within it... Make sure to adjust the ub by the selected displacement and blocklens (blocklens is like Type_contiguous) */ if (old_dtype_ptr->has_ub) { MPI_Aint ub_test; ub_test = old_dtype_ptr->ub + indices[i] + (blocklens[i] - 1) * old_dtype_ptr->extent; if (ub_marker < ub_test || !ub_found) ub_marker = ub_test; ub_found = 1; } if (old_dtype_ptr->has_lb) { if (!lb_found || lb_marker > (old_dtype_ptr->lb) + indices[i] ) lb_marker = old_dtype_ptr->lb + indices[i]; lb_found = 1; } /* Get the ub/lb from the datatype (if a MPI_UB or MPI_LB was found, then these values will be ignored). We use the lb of the old type and add the indices value to it */ lb = indices[i] + old_dtype_ptr->lb; ub = lb + (blocklens[i] * old_dtype_ptr->extent) ; if (!high_init) { high = ub; high_init = 1; } else if (ub > high) high = ub; if (!low_init ) { low = lb; low_init = 1; } else if (lb < low) low = lb; if (ub > lb) { if ( high < ub ) high = ub; if ( low > lb ) low = lb; } else { if ( high < lb ) high = lb; if ( low > ub ) low = ub; } dteptr->elements += (blocklens[i] * old_dtype_ptr->elements); } /* end else */ if (i < count - 1) { size = old_dtype_ptr->size * blocklens[i]; dteptr->size += size; } else { dteptr->size += (blocklens[i] * old_dtype_ptr->size); } } /* end for loop */ /* Set the upper/lower bounds and the extent and size */ if (lb_found) { dteptr->lb = lb_marker; dteptr->has_lb = 1; } else dteptr->lb = (low_init ? low : 0); if (ub_found) { dteptr->ub = ub_marker; dteptr->has_ub = 1; } else dteptr->ub = (high_init ? high: 0); dteptr->extent = dteptr->ub - dteptr->lb ; dteptr->real_ub = real_ub; dteptr->real_lb = real_lb; /* If there is no explicit ub/lb marker, make the extent/ub fit the alignment of the largest basic item, if that structure alignment is chosen */ if (!lb_found && !ub_found) { MPI_Aint eps_offset; /* Since data is always offset by the extent, is the extent that we must adjust. */ eps_offset = dteptr->extent % dteptr->align; if (eps_offset > 0) { dteptr->ub += (dteptr->align - eps_offset); dteptr->extent = dteptr->ub - dteptr->lb; } } # if defined(MPID_HAS_TYPE_STRUCT) { mpi_errno = MPID_Type_struct(count, blocklens, indices, old_types, *newtype); } # endif revertSignal(); return (mpi_errno); }
/*@ MPI_Cart_create - Makes a new communicator to which topology information has been attached Input Parameters: + comm_old - input communicator (handle) . ndims - number of dimensions of cartesian grid (integer) . dims - integer array of size ndims specifying the number of processes in each dimension . periods - logical array of size ndims specifying whether the grid is periodic (true) or not (false) in each dimension - reorder - ranking may be reordered (true) or not (false) (logical) Output Parameter: . comm_cart - communicator with new cartesian topology (handle) Algorithm: We ignore 'reorder' info currently. .N fortran .N Errors .N MPI_SUCCESS .N MPI_ERR_TOPOLOGY .N MPI_ERR_DIMS .N MPI_ERR_ARG @*/ int MPI_Cart_create ( MPI_Comm comm_old, int ndims, int *dims, int *periods, int reorder, MPI_Comm *comm_cart ) { int range[1][3]; MPI_Group group_old, group; int i, rank, num_ranks = 1; int mpi_errno = MPI_SUCCESS; int flag, size; MPIR_TOPOLOGY *topo; struct MPIR_COMMUNICATOR *comm_old_ptr; static char myname[] = "MPI_CART_CREATE"; TR_PUSH(myname); comm_old_ptr = MPIR_GET_COMM_PTR(comm_old); /* Check validity of arguments */ #ifndef MPIR_NO_ERROR_CHECKING MPIR_TEST_MPI_COMM(comm_old,comm_old_ptr,comm_old_ptr,myname); MPIR_TEST_ARG(comm_cart); MPIR_TEST_ARG(periods); if (ndims < 1 || dims == (int *)0) mpi_errno = MPI_ERR_DIMS; if (mpi_errno) return MPIR_ERROR(comm_old_ptr, mpi_errno, myname ); /* Check for Intra-communicator */ MPI_Comm_test_inter ( comm_old, &flag ); if (flag) return MPIR_ERROR(comm_old_ptr, MPIR_ERRCLASS_TO_CODE(MPI_ERR_COMM,MPIR_ERR_COMM_INTER), myname ); #endif /* Determine number of ranks in topology */ for ( i=0; i<ndims; i++ ) num_ranks *= (dims[i]>0)?dims[i]:-dims[i]; if ( num_ranks < 1 ) { (*comm_cart) = MPI_COMM_NULL; return MPIR_ERROR( comm_old_ptr, MPI_ERR_TOPOLOGY, myname ); } /* Is the old communicator big enough? */ MPIR_Comm_size (comm_old_ptr, &size); if (num_ranks > size) { mpi_errno = MPIR_Err_setmsg( MPI_ERR_TOPOLOGY, MPIR_ERR_TOPO_TOO_LARGE, myname, "Topology size is larger than size of communicator", "Topology size %d is greater than communicator size %d", num_ranks, size ); return MPIR_ERROR(comm_old_ptr, mpi_errno, myname ); } /* Make new comm */ range[0][0] = 0; range[0][1] = num_ranks - 1; range[0][2] = 1; MPI_Comm_group ( comm_old, &group_old ); MPI_Group_range_incl ( group_old, 1, range, &group ); MPI_Comm_create ( comm_old, group, comm_cart ); MPI_Group_free( &group ); MPI_Group_free( &group_old ); /* Store topology information in new communicator */ if ( (*comm_cart) != MPI_COMM_NULL ) { MPIR_ALLOC(topo,(MPIR_TOPOLOGY *) MPIR_SBalloc ( MPIR_topo_els ), comm_old_ptr,MPI_ERR_EXHAUSTED,myname); MPIR_SET_COOKIE(&topo->cart,MPIR_CART_TOPOL_COOKIE) topo->cart.type = MPI_CART; topo->cart.nnodes = num_ranks; topo->cart.ndims = ndims; MPIR_ALLOC(topo->cart.dims,(int *)MALLOC( sizeof(int) * 3 * ndims ), comm_old_ptr,MPI_ERR_EXHAUSTED,myname); topo->cart.periods = topo->cart.dims + ndims; topo->cart.position = topo->cart.periods + ndims; for ( i=0; i<ndims; i++ ) { topo->cart.dims[i] = dims[i]; topo->cart.periods[i] = periods[i]; } /* Compute my position */ MPI_Comm_rank ( (*comm_cart), &rank ); for ( i=0; i < ndims; i++ ) { num_ranks = num_ranks / dims[i]; topo->cart.position[i] = rank / num_ranks; rank = rank % num_ranks; } /* cache topology information */ MPI_Attr_put ( (*comm_cart), MPIR_TOPOLOGY_KEYVAL, (void *)topo ); } TR_POP; return (mpi_errno); }
/*@ MPI_Group_intersection - Produces a group as the intersection of two existing groups Input Parameters: + group1 - first group (handle) - group2 - second group (handle) Output Parameter: . newgroup - intersection group (handle) .N fortran .N Errors .N MPI_SUCCESS .N MPI_ERR_GROUP .N MPI_ERR_EXHAUSTED .seealso: MPI_Group_free @*/ EXPORT_MPI_API int MPI_Group_intersection ( MPI_Group group1, MPI_Group group2, MPI_Group *group_out ) { int i, j, global_rank; struct MPIR_GROUP *group1_ptr, *group2_ptr, *new_group_ptr; int n; int mpi_errno = MPI_SUCCESS; static char myname[] = "MPI_GROUP_INTERSECTION"; TR_PUSH(myname); group1_ptr = MPIR_GET_GROUP_PTR(group1); group2_ptr = MPIR_GET_GROUP_PTR(group2); #ifndef MPIR_NO_ERROR_CHECKING /* MPIR_TEST_MPI_GROUP(group1,group1_ptr,MPIR_COMM_WORLD,myname); */ /* MPIR_TEST_MPI_GROUP(group2,group2_ptr,MPIR_COMM_WORLD,myname); */ MPIR_TEST_GROUP(group1_ptr); MPIR_TEST_GROUP(group2_ptr); if (mpi_errno) return MPIR_ERROR( MPIR_COMM_WORLD, mpi_errno, myname ); #endif /* Check for EMPTY groups */ if ( (group1 == MPI_GROUP_EMPTY) || (group2 == MPI_GROUP_EMPTY) ) { MPIR_Group_dup ( MPIR_GROUP_EMPTY, &new_group_ptr ); *group_out = new_group_ptr->self; TR_POP; return (mpi_errno); } /* Set the number in the intersection */ n = 0; /* Allocate set marking space for group1 if necessary */ if (group1_ptr->set_mark == NULL) { MPIR_ALLOC(group1_ptr->set_mark,(int *) MALLOC( group1_ptr->np * sizeof(int) ), MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED, myname ); } /* Mark the intersection */ for ( i=0; i<group1_ptr->np; i++ ) { group1_ptr->set_mark[i] = MPIR_UNMARKED; for ( j=0; j<group2_ptr->np; j++ ) if ( group1_ptr->lrank_to_grank[i] == group2_ptr->lrank_to_grank[j] ) { group1_ptr->set_mark[i] = MPIR_MARKED; n++; break; } } /* If there is a null intersection */ if ( n <= 0 ) { MPIR_Group_dup ( MPIR_GROUP_EMPTY, &new_group_ptr ); *group_out = new_group_ptr->self; TR_POP; return (mpi_errno); } /* Create the new group */ MPIR_ALLOC(new_group_ptr,NEW(struct MPIR_GROUP),MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED, myname ); *group_out = (MPI_Group) MPIR_FromPointer( new_group_ptr ); new_group_ptr->self = *group_out; MPIR_SET_COOKIE(new_group_ptr,MPIR_GROUP_COOKIE) new_group_ptr->ref_count = 1; new_group_ptr->permanent = 0; new_group_ptr->local_rank = MPI_UNDEFINED; new_group_ptr->set_mark = (int *)0; /* Alloc memory for lrank_to_grank array */ new_group_ptr->np = n; new_group_ptr->lrank_to_grank = (int *) MALLOC( n * sizeof(int) ); if (!new_group_ptr->lrank_to_grank) { return MPIR_ERROR( MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED, myname ); } /* Fill in the space */ for ( n=0, i=0; i<group1_ptr->np; i++ ) if ( (group1_ptr->set_mark[i]==MPIR_MARKED) && (n < new_group_ptr->np) ) new_group_ptr->lrank_to_grank[n++] = group1_ptr->lrank_to_grank[i]; /* Find the local rank */ global_rank = MPID_MyWorldRank; for( i=0; i<new_group_ptr->np; i++ ) if ( global_rank == new_group_ptr->lrank_to_grank[i] ) { new_group_ptr->local_rank = i; break; } /* Determine the previous and next powers of 2 */ MPIR_Powers_of_2 ( new_group_ptr->np, &(new_group_ptr->N2_next), &(new_group_ptr->N2_prev) ); TR_POP; return (mpi_errno); }
/*@ MPI_Group_range_excl - Produces a group by excluding ranges of processes from an existing group Input Parameters: + group - group (handle) . n - number of elements in array 'ranks' (integer) - ranges - a one-dimensional array of integer triplets of the form (first rank, last rank, stride), indicating the ranks in 'group' of processes to be excluded from the output group 'newgroup' . Output Parameter: . newgroup - new group derived from above, preserving the order in 'group' (handle) Note: Currently, each of the ranks to exclude must be a valid rank in the group and all elements must be distinct or the function is erroneous. This restriction is per the draft. .N fortran .N Errors .N MPI_SUCCESS .N MPI_ERR_GROUP .N MPI_ERR_EXHAUSTED .N MPI_ERR_RANK .N MPI_ERR_ARG .seealso: MPI_Group_free @*/ EXPORT_MPI_API int MPI_Group_range_excl ( MPI_Group group, int n, int ranges[][3], MPI_Group *newgroup ) { int i, j, first, last, stride; int np; struct MPIR_GROUP *group_ptr, *new_group_ptr; int mpi_errno = MPI_SUCCESS; static char myname[] = "MPI_GROUP_RANGE_EXCL"; TR_PUSH(myname); /* Check for bad arguments */ group_ptr = MPIR_GET_GROUP_PTR(group); #ifndef MPIR_NO_ERROR_CHECKING /* MPIR_TEST_MPI_GROUP(group,group_ptr,MPIR_COMM_WORLD,myname); */ MPIR_TEST_GROUP(group_ptr); if (mpi_errno) return MPIR_ERROR(MPIR_COMM_WORLD, mpi_errno, myname ); #endif /* Check for a EMPTY input group */ if ( (group == MPI_GROUP_EMPTY) ) { MPIR_Group_dup ( MPIR_GROUP_EMPTY, &new_group_ptr ); *newgroup = new_group_ptr->self; TR_POP; return (mpi_errno); } /* Check for no range ranks to exclude */ if ( n == 0 ) { MPIR_Group_dup ( group_ptr, &new_group_ptr ); *newgroup = new_group_ptr->self; return (mpi_errno); } if (n < 0) return MPIR_ERROR( MPIR_COMM_WORLD, MPI_ERR_ARG, myname ); /* Allocate set marking space for group if necessary */ if (group_ptr->set_mark == NULL) { MPIR_ALLOC(group_ptr->set_mark,(int *) MALLOC( group_ptr->np * sizeof(int) ), MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED, myname ); } (void) memset( group_ptr->set_mark, (char)0, group_ptr->np * sizeof(int) ); /* Mark the ranks to be excluded */ np = group_ptr->np; for (i=0; i<n; i++) { first = ranges[i][0]; last = ranges[i][1]; stride = ranges[i][2]; if (stride != 0) { if ( (stride > 0 && first > last) || (stride < 0 && first < last) ) { mpi_errno = MPIR_Err_setmsg( MPI_ERR_ARG, MPIR_ERR_ARG_STRIDE, myname, "Range does not terminate", "Range (%d,%d,%d) does not terminate", first, last, stride ); return MPIR_ERROR( MPIR_COMM_WORLD, mpi_errno, myname ); } for ( j=first; j*stride <= last*stride; j += stride ) if ( (j < group_ptr->np) && (j >= 0) ) { if (group_ptr->set_mark[j] == MPIR_UNMARKED) { group_ptr->set_mark[j] = MPIR_MARKED; np--; } } else{ mpi_errno = MPIR_Err_setmsg( MPI_ERR_RANK, MPIR_ERR_DEFAULT, myname, (char *)0,(char *)0, j ); return MPIR_ERROR( MPIR_COMM_WORLD, mpi_errno, myname ); } } else { mpi_errno = MPIR_Err_setmsg( MPI_ERR_ARG, MPIR_ERR_ARG_ZERO_STRIDE, myname, "Zero stride is incorrect", (char *)0 ); return MPIR_ERROR( MPIR_COMM_WORLD, mpi_errno, myname ); } } /* Check np to see if we have original group or if we have null group */ if (np == 0) { MPIR_Group_dup ( MPIR_GROUP_EMPTY, &new_group_ptr ); *newgroup = new_group_ptr->self; return (mpi_errno); } if (np == group_ptr->np) { MPIR_Group_dup ( group_ptr, &new_group_ptr ); *newgroup = new_group_ptr->self; return (mpi_errno); } /* Create the new group */ MPIR_ALLOC(new_group_ptr,NEW(struct MPIR_GROUP),MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED, myname ); *newgroup = (MPI_Group) MPIR_FromPointer( new_group_ptr ); new_group_ptr->self = *newgroup; MPIR_SET_COOKIE(new_group_ptr,MPIR_GROUP_COOKIE) new_group_ptr->ref_count = 1; new_group_ptr->permanent = 0; new_group_ptr->local_rank = MPI_UNDEFINED; new_group_ptr->set_mark = (int *)0; new_group_ptr->np = np; new_group_ptr->lrank_to_grank = (int *) MALLOC( np * sizeof(int) ); if (!new_group_ptr->lrank_to_grank) { return MPIR_ERROR( MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED, myname ); } /* Fill in new group */ for (i=j=0; i < group_ptr->np ; i++) if ( (group_ptr->set_mark[i] == MPIR_UNMARKED) && (j < new_group_ptr->np ) ) { if (group_ptr->local_rank == i) new_group_ptr->local_rank = j; new_group_ptr->lrank_to_grank[j++] = group_ptr->lrank_to_grank[i]; } /* Determine the previous and next powers of 2 */ MPIR_Powers_of_2 ( new_group_ptr->np, &(new_group_ptr->N2_next), &(new_group_ptr->N2_prev) ); TR_POP; return (mpi_errno); }