int main( int argc, char *argv[] ) { int errs = 0; #if MTEST_HAVE_MIN_MPI_VERSION(2,2) int i; int *inbuf = NULL; int *inoutbuf = NULL; int count = -1; MPI_Op uop = MPI_OP_NULL; #endif MTest_Init(&argc, &argv); #if MTEST_HAVE_MIN_MPI_VERSION(2,2) /* this function was added in MPI-2.2 */ inbuf = malloc(sizeof(int) * MAX_BUF_ELEMENTS); inoutbuf = malloc(sizeof(int) * MAX_BUF_ELEMENTS); for (count = 0; count < MAX_BUF_ELEMENTS; count > 0 ? count*=2 : count++) { for (i = 0; i < count; ++i) { inbuf[i] = i; inoutbuf[i] = i; } MPI_Reduce_local(inbuf, inoutbuf, count, MPI_INT, MPI_SUM); for (i = 0; i < count; ++i) if (inbuf[i] != i) { ++errs; if (inoutbuf[i] != (2*i)) ++errs; } } /* make sure that user-define ops work too */ MPI_Op_create(&user_op, 0/*!commute*/, &uop); for (count = 0; count < MAX_BUF_ELEMENTS; count > 0 ? count*=2 : count++) { for (i = 0; i < count; ++i) { inbuf[i] = i; inoutbuf[i] = i; } MPI_Reduce_local(inbuf, inoutbuf, count, MPI_INT, uop); errs += uop_errs; for (i = 0; i < count; ++i) if (inbuf[i] != i) { ++errs; if (inoutbuf[i] != (3*i)) ++errs; } } MPI_Op_free(&uop); free(inbuf); free(inoutbuf); #endif MTest_Finalize(errs); MPI_Finalize(); return 0; }
/** I didn't find a non-linear version of the scan function in openmpi in either basic, tuned or hierarchy. Is it even possible given the collective? **/ int MPI_Scan_linear( void *sbuf, void *rbuf, int cnt, MPI_Datatype dt, MPI_Op op, MPI_Comm comm ) { int rc = MPI_SUCCESS, extent, rank, size; MPI_CHECK( rc = MPI_Comm_size( comm, &size ) ); MPI_CHECK( rc = MPI_Comm_rank( comm, &rank ) ); MPI_CHECK( rc = MPI_Type_size( dt, &extent ) ); if( rank == 0 ){ memmove( rbuf, sbuf, cnt * extent ); if( size > 1 ){ MPI_CHECK( rc = MPI_Send( rbuf, cnt, dt, 1, SCAN_TAG, comm ) ); } } else { MPI_CHECK( rc = MPI_Recv( rbuf, cnt, dt, rank - 1, SCAN_TAG, comm, MPI_STATUS_IGNORE ) ); MPI_CHECK( rc = MPI_Reduce_local( sbuf, rbuf, cnt, dt, op ) ); if( rank != ( size - 1 ) ){ MPI_CHECK( rc = MPI_Send( rbuf, cnt, dt, rank + 1, SCAN_TAG, comm ) ); } } return rc; }
int MPIR_Reduce_local_cdesc(CFI_cdesc_t* x0, CFI_cdesc_t* x1, int x2, MPI_Datatype x3, MPI_Op x4) { int err = MPI_SUCCESS; void *buf0 = x0->base_addr; void *buf1 = x1->base_addr; int count1 = x2; MPI_Datatype dtype1 = x3; if (buf0 == &MPIR_F08_MPI_BOTTOM) { buf0 = MPI_BOTTOM; } if (buf1 == &MPIR_F08_MPI_BOTTOM) { buf1 = MPI_BOTTOM; } if (x1->rank != 0 && !CFI_is_contiguous(x1)) { err = cdesc_create_datatype(x1, x2, x3, &dtype1); count1 = 1; } err = MPI_Reduce_local(buf0, buf1, count1, dtype1, x4); if (dtype1 != x3) MPI_Type_free(&dtype1); return err; }
void mpi_reduce_local_f(char *inbuf, char *inoutbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *ierr) { MPI_Datatype c_type; MPI_Op c_op; c_type = MPI_Type_f2c(*datatype); c_op = MPI_Op_f2c(*op); inbuf = (char *) OMPI_F2C_BOTTOM(inbuf); inoutbuf = (char *) OMPI_F2C_BOTTOM(inoutbuf); *ierr = OMPI_INT_2_FINT(MPI_Reduce_local(inbuf, inoutbuf, OMPI_FINT_2_INT(*count), c_type, c_op)); }
/* * Class: mpi_Intracomm * Method: Reduce_local * Signature: (Ljava/lang/Object;ILjava/lang/Object;IILmpi/Datatype;Lmpi/Op;I)V */ JNIEXPORT void JNICALL Java_mpi_Intracomm_reduce_1local(JNIEnv *env, jobject jthis, jobject inbuf, jobject inoutbuf, jint count, jobject type, jobject op) { MPI_Datatype mpi_type = (MPI_Datatype)((*env)->GetLongField(env,type,ompi_java.DatatypehandleID)) ; int baseType = (*env)->GetIntField(env, type, ompi_java.DatatypebaseTypeID) ; void *inptr, *inoutptr = NULL; void *inbase, *inoutbase ; ompi_java_clearFreeList(env) ; inptr = ompi_java_getBufPtr(&inbase, env, inbuf, baseType, 0) ; inoutptr = ompi_java_getBufPtr(&inoutbase, env, inoutbuf, baseType, 0) ; MPI_Reduce_local(inptr, inoutptr, count, mpi_type, (MPI_Op)((*env)->GetLongField(env,op,ompi_java.OphandleID))) ; ompi_java_releaseBufPtr(env, inbuf, inbase, baseType) ; ompi_java_releaseBufPtr(env, inoutbuf, inoutbase, baseType) ; }
FORT_DLL_SPEC void FORT_CALL mpi_reduce_local_ ( void*v1, void*v2, MPI_Fint *v3, MPI_Fint *v4, MPI_Fint *v5, MPI_Fint *ierr ){ *ierr = MPI_Reduce_local( v1, v2, (int)*v3, (MPI_Datatype)(*v4), (MPI_Op)*v5 ); }
int MPI_Reduce_linear( void *sbuf, void *rbuf, int cnt, MPI_Datatype dt, MPI_Op op, int root, MPI_Comm comm ) { int i, rank, rc, size; MPI_Aint lb, extent; char *free_buf = NULL, *pml_buf = NULL, *inplace_temp = NULL; char *inbuf = NULL; MPI_CHECK( rc = MPI_Comm_rank( comm, &rank ) ); MPI_CHECK( rc = MPI_Comm_size( comm, &size ) ); if( root == MPI_PROC_NULL ){ return MPI_SUCCESS; } else if( root != rank ){ MPI_CHECK( rc = MPI_Send( sbuf, cnt, dt, root, REDUCE_TAG, comm ) ); return rc; } MPI_CHECK( rc = MPI_Type_get_extent( dt, &lb, &extent ) ); if( sbuf == MPI_IN_PLACE ){ sbuf = rbuf; NULL_CHECK( inplace_temp = malloc( cnt * extent ) ); rbuf = inplace_temp - lb; } if( size > 1 ){ NULL_CHECK( free_buf = malloc( extent * cnt ) ); pml_buf = free_buf - lb; } if( rank == ( size - 1 ) ){ memcpy( rbuf, sbuf, extent * cnt ); } else { MPI_CHECK( rc = MPI_Recv( rbuf, cnt, dt, size - 1, REDUCE_TAG, comm, MPI_STATUS_IGNORE ) ); } /* * Loop receiving and reducing only subset of OPs may be supported */ for( i = size - 2 ; i >= 0 ; i-- ){ if( rank == i ){ inbuf = sbuf; } else { // printf("right before receive with pml_buf\n"); MPI_CHECK( rc = MPI_Recv( pml_buf, cnt, dt, i, REDUCE_TAG, comm, MPI_STATUS_IGNORE ) ); inbuf = pml_buf; } // printf("right before reduce local\n"); MPI_CHECK( rc = MPI_Reduce_local( inbuf, rbuf, cnt, dt, op ) ); } if( inplace_temp ){ // printf("right before memcpy"); memcpy( sbuf, inplace_temp, extent * cnt ); free( inplace_temp ); } if( free_buf != NULL ) free( free_buf ); // printf("Return rc\n"); return rc; }
int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { if(!comm.is_ep) { return ::MPI_Scan(sendbuf, recvbuf, count, to_mpi_type(datatype), to_mpi_op(op), to_mpi_comm(comm.mpi_comm)); } valid_type(datatype); int ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; int ep_size = comm.ep_comm_ptr->size_rank_info[0].second; int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; int mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; ::MPI_Aint datasize, lb; ::MPI_Type_get_extent(to_mpi_type(datatype), &lb, &datasize); void* tmp_sendbuf; tmp_sendbuf = new void*[datasize * count]; int my_src = 0; int my_dst = ep_rank; std::vector<int> my_map(mpi_size, 0); for(int i=0; i<comm.rank_map->size(); i++) my_map[comm.rank_map->at(i).second]++; for(int i=0; i<mpi_rank; i++) my_src += my_map[i]; my_src += ep_rank_loc; for(int i=0; i<mpi_size; i++) { if(my_dst < my_map[i]) { my_dst = get_ep_rank(comm, my_dst, i); break; } else my_dst -= my_map[i]; } if(ep_rank != my_dst) { MPI_Request request[2]; MPI_Status status[2]; MPI_Isend(sendbuf, count, datatype, my_dst, my_dst, comm, &request[0]); MPI_Irecv(tmp_sendbuf, count, datatype, my_src, ep_rank, comm, &request[1]); MPI_Waitall(2, request, status); } else memcpy(tmp_sendbuf, sendbuf, datasize*count); void* tmp_recvbuf; tmp_recvbuf = new void*[datasize * count]; MPI_Reduce_local(tmp_sendbuf, tmp_recvbuf, count, datatype, op, 0, comm); if(ep_rank_loc == 0) ::MPI_Exscan(MPI_IN_PLACE, tmp_recvbuf, count, to_mpi_type(datatype), to_mpi_op(op), to_mpi_comm(comm.mpi_comm)); // printf(" ID=%d : %d %d \n", ep_rank, static_cast<int*>(tmp_recvbuf)[0], static_cast<int*>(tmp_recvbuf)[1]); MPI_Exscan_local(tmp_sendbuf, tmp_recvbuf, count, datatype, op, comm); // printf(" ID=%d : after local tmp_sendbuf = %d %d ; tmp_recvbuf = %d %d \n", ep_rank, static_cast<int*>(tmp_sendbuf)[0], static_cast<int*>(tmp_sendbuf)[1], static_cast<int*>(tmp_recvbuf)[0], static_cast<int*>(tmp_recvbuf)[1]); if(ep_rank != my_src) { MPI_Request request[2]; MPI_Status status[2]; MPI_Isend(tmp_recvbuf, count, datatype, my_src, my_src, comm, &request[0]); MPI_Irecv(recvbuf, count, datatype, my_dst, ep_rank, comm, &request[1]); MPI_Waitall(2, request, status); } else memcpy(recvbuf, tmp_recvbuf, datasize*count); delete[] tmp_sendbuf; delete[] tmp_recvbuf; }