/** One-sided accumulate operation with typed arguments. Source buffer must be private. * * @param[in] mreg Memory region * @param[in] src Address of source data * @param[in] src_count Number of elements of the given type at the source * @param[in] src_type MPI datatype of the source elements * @param[in] dst Address of destination buffer * @param[in] dst_count Number of elements of the given type at the destination * @param[in] src_type MPI datatype of the destination elements * @param[in] size Number of bytes to transfer * @param[in] proc Absolute process id of target process * @return 0 on success, non-zero on failure */ int gmr_accumulate_typed(gmr_t *mreg, void *src, int src_count, MPI_Datatype src_type, void *dst, int dst_count, MPI_Datatype dst_type, int proc) { int grp_proc; gmr_size_t disp; MPI_Aint lb, extent; grp_proc = ARMCII_Translate_absolute_to_group(&mreg->group, proc); ARMCII_Assert(grp_proc >= 0); // Calculate displacement from beginning of the window if (dst == MPI_BOTTOM) disp = 0; else disp = (gmr_size_t) ((uint8_t*)dst - (uint8_t*)mreg->slices[proc].base); // Perform checks MPI_Type_get_true_extent(dst_type, &lb, &extent); ARMCII_Assert(mreg->lock_state != GMR_LOCK_UNLOCKED); ARMCII_Assert_msg(disp >= 0 && disp < mreg->slices[proc].size, "Invalid remote address"); ARMCII_Assert_msg(disp + dst_count*extent <= mreg->slices[proc].size, "Transfer is out of range"); MPI_Accumulate(src, src_count, src_type, grp_proc, (MPI_Aint) disp, dst_count, dst_type, MPI_SUM, mreg->window); return 0; }
/* execute a purely local sort */ int DTCMP_Sort_local_mergesort( const void* inbuf, void* outbuf, int count, MPI_Datatype key, MPI_Datatype keysat, DTCMP_Op cmp, DTCMP_Flags hints) { int rc = DTCMP_SUCCESS; MPI_Aint lb, extent; MPI_Type_get_true_extent(keysat, &lb, &extent); if (count > 0 && extent > 0) { /* copy data to outbuf if it's not already there */ if (inbuf != DTCMP_IN_PLACE) { DTCMP_Memcpy(outbuf, count, keysat, inbuf, count, keysat); } /* allocate scratch space */ void* scratch = dtcmp_malloc(count * extent, 0, __FILE__, __LINE__); /* execute our merge sort */ size_t size = (size_t) extent; rc = dtcmp_sort_local_mergesort_scratch(outbuf, scratch, count, size, cmp, hints); /* free scratch space */ dtcmp_free(&scratch); } return rc; }
/* Extract an m x n submatrix within an m x N matrix and transpose it. Assume storage by rows; the defined datatype accesses by columns */ MPI_Datatype transpose_type(int N, int m, int n, MPI_Datatype type) /* computes a datatype for the transpose of an mxn matrix with entries of type type */ { MPI_Datatype subrow, subrow1, submatrix; MPI_Aint lb, extent; MPI_Type_vector(m, 1, N, type, &subrow); MPI_Type_get_extent(type, &lb, &extent); MPI_Type_create_resized(subrow, 0, extent, &subrow1); MPI_Type_contiguous(n, subrow1, &submatrix); MPI_Type_commit(&submatrix); MPI_Type_free( &subrow ); MPI_Type_free( &subrow1 ); /* Add a consistency test: the size of submatrix should be n * m * sizeof(type) and the extent should be ((m-1)*N+n) * sizeof(type) */ { int tsize; MPI_Aint textent, llb; MPI_Type_size( type, &tsize ); MPI_Type_get_true_extent( submatrix, &llb, &textent ); if (textent != tsize * (N * (m-1)+n)) { fprintf( stderr, "Transpose Submatrix extent is %ld, expected %ld (%d,%d,%d)\n", (long)textent, (long)(tsize * (N * (m-1)+n)), N, n, m ); } } return(submatrix); }
void print_pictogram(std::ostream& os, unsigned int width = 8) { std::pair<MPI_Aint, MPI_Aint> ext; std::pair<MPI_Aint, MPI_Aint> true_ext; MPI_Type_get_extent(type, &ext.first, &ext.second); MPI_Type_get_true_extent(type, &true_ext.first, &true_ext.second); if (ext.first != 0) { os << "Pictogram not available for types with lb != 0" << std::endl; return; } MPI_Aint ex = ext.second; if (ext.second < true_ext.second) { os << "Pictogram not available for types with ub < true_ub" << std::endl; return; } // use single letter type unsigned int pos = 0; os << "["; for (unsigned int i = 0; i < width; ++i) { os << "-"; } os << "]" << std::endl << "["; for (auto it = m.begin(); it != m.end(); ++it) { while (pos < it->first) { if (pos % width == 0 && pos != 0) os << "]" << std::endl << "["; os << " "; ++pos; } // get type size and type char char type_char = builtin_typename_map::get_typeid_name(it->second)[0]; int size; MPI_Type_size(it->second, &size); // print the character `size` times for (int i = 0; i < size; ++i) { if (pos % width == 0 && pos != 0) os << "]" << std::endl << "["; os << type_char; ++pos; } } while (pos < ex) { if (pos % width == 0 && pos != 0) os << "]" << std::endl << "["; os << " "; ++pos; } os << "]" << std::endl << "["; for (unsigned int i = 0; i < width; ++i) { os << "-"; } os << "]" << std::endl; }
void mpi_type_get_true_extent_f(MPI_Fint *datatype, MPI_Aint *true_lb, MPI_Aint *true_extent, MPI_Fint *ierr) { MPI_Datatype c_type = MPI_Type_f2c(*datatype); *ierr = OMPI_INT_2_FINT(MPI_Type_get_true_extent(c_type, true_lb, true_extent)); }
JNIEXPORT void JNICALL Java_mpi_Datatype_getTrueLbExtent( JNIEnv *env, jobject jthis, jlong type, jintArray jLbExt) { MPI_Aint lb, extent; int rc = MPI_Type_get_true_extent((MPI_Datatype)type, &lb, &extent); if(ompi_java_exceptionCheck(env, rc)) return; jint *lbExt = (*env)->GetIntArrayElements(env, jLbExt, NULL); lbExt[0] = (jint)lb; lbExt[1] = (jint)extent; (*env)->ReleaseIntArrayElements(env, jLbExt, lbExt, 0); }
int zmpil_simple_create_derived(zmpil_simple_t *mpil, MPI_Datatype type, int count) /* zmpi_func zmpil_simple_create_derived */ { /* mpil->type = type;*/ Z_TRACE("zmpil_simple_create_derived"); MPI_Type_get_true_extent(type, &mpil->true_lb, &mpil->true_extent); mpil->true_extent *= count; Z_ASSERT(mpil->true_lb == 0); return 0; }
int MPIU_datatype_full_size(MPI_Datatype datatype, MPI_Aint *size) { int error_code = MPI_SUCCESS; MPI_Aint extent = 0; MPI_Aint true_extent = 0; MPI_Aint true_lb = 0; error_code = MPI_Type_get_true_extent(datatype, &true_lb, &true_extent); if (error_code != MPI_SUCCESS) goto fn_exit; *size = true_extent; fn_exit: return error_code; }
int DTCMP_Sort_allgather( const void* inbuf, void* outbuf, int count, MPI_Datatype key, MPI_Datatype keysat, DTCMP_Op cmp, DTCMP_Flags hints, MPI_Comm comm) { /* get our rank and the number of ranks in this comm */ int rank, ranks; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &ranks); /* compute total number of items that we'll collect */ int total_count = count * ranks; /* get true extent of keysat type */ MPI_Aint true_lb, true_extent; MPI_Type_get_true_extent(keysat, &true_lb, &true_extent); /* allocate space to hold all items from all procs */ size_t buf_size = total_count * true_extent; if (buf_size > 0) { char* buf = (char*) dtcmp_malloc(buf_size, 0, __FILE__, __LINE__); /* gather all items, send from outbuf if IN_PLACE is specified */ void* sendbuf = (void*) inbuf; if (inbuf == DTCMP_IN_PLACE) { sendbuf = outbuf; } char* recvbuf = buf - true_lb; MPI_Allgather(sendbuf, count, keysat, (void*)recvbuf, count, keysat, comm); /* sort items with local sort */ DTCMP_Sort_local(DTCMP_IN_PLACE, recvbuf, total_count, key, keysat, cmp, hints); /* copy our items into outbuf */ char* mybuf = recvbuf + count * rank * true_extent; DTCMP_Memcpy(outbuf, count, keysat, (void*)mybuf, count, keysat); /* free off our temporary buffers */ dtcmp_free(&buf); } return DTCMP_SUCCESS; }
void ADIOI_Datatype_iscontig(MPI_Datatype datatype, int *flag) { MPIR_Datatype_iscontig(datatype, flag); /* if it is MPICH2 and the datatype is reported as contigous, check if the true_lb is non-zero, and if so, mark the datatype as noncontiguous */ #ifdef MPICH2 if (*flag) { MPI_Aint true_extent, true_lb; MPI_Type_get_true_extent(datatype, &true_lb, &true_extent); if (true_lb > 0) *flag = 0; } #endif }
/*@C PetscSFGetWindow - Get a window for use with a given data type Collective on PetscSF Input Arguments: + sf - star forest . unit - data type . array - array to be sent . epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window . fenceassert - assert parameter for call to MPI_Win_fence(), if PETSCSF_WINDOW_SYNC_FENCE . postassert - assert parameter for call to MPI_Win_post(), if PETSCSF_WINDOW_SYNC_ACTIVE - startassert - assert parameter for call to MPI_Win_start(), if PETSCSF_WINDOW_SYNC_ACTIVE Output Arguments: . win - window Level: developer Developer Notes: This currently always creates a new window. This is more synchronous than necessary. An alternative is to try to reuse an existing window created with the same array. Another alternative is to maintain a cache of windows and reuse whichever one is available, by copying the array into it if necessary. .seealso: PetscSFGetRanks(), PetscSFWindowGetDataTypes() @*/ static PetscErrorCode PetscSFGetWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscBool epoch,PetscMPIInt fenceassert,PetscMPIInt postassert,PetscMPIInt startassert,MPI_Win *win) { PetscSF_Window *w = (PetscSF_Window*)sf->data; PetscErrorCode ierr; MPI_Aint lb,lb_true,bytes,bytes_true; PetscSFWinLink link; PetscFunctionBegin; ierr = MPI_Type_get_extent(unit,&lb,&bytes);CHKERRQ(ierr); ierr = MPI_Type_get_true_extent(unit,&lb_true,&bytes_true);CHKERRQ(ierr); if (lb != 0 || lb_true != 0) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with nonzero lower bound, write [email protected] if you want this feature"); if (bytes != bytes_true) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with modified extent, write [email protected] if you want this feature"); ierr = PetscMalloc(sizeof(*link),&link);CHKERRQ(ierr); link->bytes = bytes; link->addr = array; ierr = MPI_Win_create(array,(MPI_Aint)bytes*sf->nroots,(PetscMPIInt)bytes,MPI_INFO_NULL,PetscObjectComm((PetscObject)sf),&link->win);CHKERRQ(ierr); link->epoch = epoch; link->next = w->wins; link->inuse = PETSC_TRUE; w->wins = link; *win = link->win; if (epoch) { switch (w->sync) { case PETSCSF_WINDOW_SYNC_FENCE: ierr = MPI_Win_fence(fenceassert,*win);CHKERRQ(ierr); break; case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */ break; case PETSCSF_WINDOW_SYNC_ACTIVE: { MPI_Group ingroup,outgroup; ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr); ierr = MPI_Win_post(ingroup,postassert,*win);CHKERRQ(ierr); ierr = MPI_Win_start(outgroup,startassert,*win);CHKERRQ(ierr); } break; default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type"); } } PetscFunctionReturn(0); }
/* determine whether type is contiguous, has a true lower bound of 0, * and extent == true_extent */ static int dtcmp_type_is_valid(MPI_Datatype type) { /* get (user-defined) lower bound and extent */ MPI_Aint lb, extent; MPI_Type_get_extent(type, &lb, &extent); /* get true lower bound and extent */ MPI_Aint true_lb, true_extent; MPI_Type_get_true_extent(type, &true_lb, &true_extent); /* get size of type */ int size; MPI_Type_size(type, &size); /* check that type is contiguous (size == true_extent ==> no holes) */ if (size != true_extent) { return 0; } /* check that lower bounds are 0 */ if (lb != 0 || true_lb != 0) { return 0; } /* check that extent == true_extent ==> no funny business if we * concatenate a series of these types */ if (extent != true_extent) { return 0; } /* check that extent is positive */ if (extent <= 0) { return 0; } return 1; }
/* check whether all items in buf are already in sorted order */ int DTCMP_Is_sorted( const void* buf, int count, MPI_Datatype key, MPI_Datatype keysat, DTCMP_Op cmp, DTCMP_Flags hints, MPI_Comm comm, int* flag) { int rc = DTCMP_SUCCESS; /* assume that items are globally sorted, * we'll set this to 0 if we find otherwise */ int sorted = 1; /* get our rank and the number of ranks in the communicator */ int rank, ranks; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &ranks); /* first, step through and check that all of our local items are in order */ DTCMP_Is_sorted_local(buf, count, key, keysat, cmp, hints, &sorted); /* bail out at this point if ranks == 1 */ if (ranks <= 1) { *flag = sorted; return DTCMP_SUCCESS; } /* get extent of keysat */ MPI_Aint lb, extent; MPI_Type_get_extent(keysat, &lb, &extent); /* get true extent of key */ MPI_Aint key_true_lb, key_true_extent; MPI_Type_get_true_extent(key, &key_true_lb, &key_true_extent); /* TODO: if we know that each proc has an item, * we could just do a single pt2pt send to the rank one higher, * compare, then allreduce, and thereby avoid the type/op creation * and scan that follows */ /* allocate type for scan, one int to say whether key is valid, * and our largest key */ size_t item_size = sizeof(int) + key_true_extent; char* sendbuf = dtcmp_malloc(item_size, 0, __FILE__, __LINE__); char* recvbuf = dtcmp_malloc(item_size, 0, __FILE__, __LINE__); /* copy our largest item into our send buffer, * set valid flag to 1 if we have a value */ int* valid = (int*) sendbuf; void* value = (void*) (sendbuf + sizeof(int)); if (count > 1) { *valid = 1; /* get pointer to largest element in our buffer, * and copy it to our send buffer */ const void* lastitem = (const void*) ((const char*)buf + (count - 1) * extent); DTCMP_Memcpy(value, 1, key, lastitem, 1, key); } else { /* we dont have any items, so set valid flag to 0 */ *valid = 0; } /* create and commit type that consists of leading int followed by key */ MPI_Datatype validtype; dtcmp_type_concat2(MPI_INT, key, &validtype); /* create user-defined reduction operation to copy key if its valid */ MPI_Op validop; MPI_Op_create(copy_key_if_valid, 0, &validop); /* execute scan to get key from next process to our left (that has an item) */ MPI_Exscan(sendbuf, recvbuf, 1, validtype, validop, comm); /* free off our user-defined reduction op and datatype */ MPI_Op_free(&validop); MPI_Type_free(&validtype); /* compare our smallest item to the received item */ if (count > 0 && rank > 0) { int recvvalid = *(int*) recvbuf; if (recvvalid) { const void* recvkey = (const void*) (recvbuf + sizeof(int)); if (dtcmp_op_eval(recvkey, buf, cmp) > 0) { sorted = 0; } } } /* allreduce to determine whether all items are in order */ int all_sorted; MPI_Allreduce(&sorted, &all_sorted, 1, MPI_INT, MPI_LAND, comm); /* free the scratch space */ dtcmp_free(&recvbuf); dtcmp_free(&sendbuf); /* set caller's output flag and return */ *flag = all_sorted; return rc; }
int MPICH_AlltoAll_short( void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm ) { int comm_size, i, pof2; MPI_Aint sendtype_extent, recvtype_extent; int mpi_errno=MPI_SUCCESS, src, dst, rank, nbytes; MPI_Status status; void *tmp_buf; int sendtype_size, pack_size, block, position, *displs, count; MPI_Datatype newtype; MPI_Aint recvtype_true_extent, recvbuf_extent, recvtype_true_lb; if (sendcount == 0) return MPI_SUCCESS; MPI_Comm_rank (MPI_COMM_WORLD, &rank); MPI_Comm_size (MPI_COMM_WORLD, &comm_size); /* Get extent of send and recv types */ MPID_Datatype_get_extent_macro(recvtype, recvtype_extent); MPID_Datatype_get_extent_macro(sendtype, sendtype_extent); MPID_Datatype_get_size_macro(sendtype, sendtype_size); nbytes = sendtype_size * sendcount; /* use the indexing algorithm by Jehoshua Bruck et al, * IEEE TPDS, Nov. 97 */ /* allocate temporary buffer */ MPI_Pack_size(recvcount*comm_size, recvtype, comm, &pack_size); tmp_buf = malloc(pack_size); CkAssert(tmp_buf); /* Do Phase 1 of the algorithim. Shift the data blocks on process i * upwards by a distance of i blocks. Store the result in recvbuf. */ MPICH_Localcopy((char *) sendbuf + rank*sendcount*sendtype_extent, (comm_size - rank)*sendcount, sendtype, recvbuf, (comm_size - rank)*recvcount, recvtype); MPICH_Localcopy(sendbuf, rank*sendcount, sendtype, (char *) recvbuf + (comm_size-rank)*recvcount*recvtype_extent, rank*recvcount, recvtype); /* Input data is now stored in recvbuf with datatype recvtype */ /* Now do Phase 2, the communication phase. It takes ceiling(lg p) steps. In each step i, each process sends to rank+2^i and receives from rank-2^i, and exchanges all data blocks whose ith bit is 1. */ /* allocate displacements array for indexed datatype used in communication */ displs = (int*)malloc(comm_size * sizeof(int)); CkAssert(displs); pof2 = 1; while (pof2 < comm_size) { dst = (rank + pof2) % comm_size; src = (rank - pof2 + comm_size) % comm_size; /* Exchange all data blocks whose ith bit is 1 */ /* Create an indexed datatype for the purpose */ count = 0; for (block=1; block<comm_size; block++) { if (block & pof2) { displs[count] = block * recvcount; count++; } } mpi_errno = MPI_Type_create_indexed_block(count, recvcount, displs, recvtype, &newtype); if (mpi_errno) return mpi_errno; mpi_errno = MPI_Type_commit(&newtype); if (mpi_errno) return mpi_errno; position = 0; mpi_errno = MPI_Pack(recvbuf, 1, newtype, tmp_buf, pack_size, &position, comm); mpi_errno = AMPI_Sendrecv(tmp_buf, position, MPI_PACKED, dst, MPI_ATA_TAG, recvbuf, 1, newtype, src, MPI_ATA_TAG, comm, MPI_STATUS_IGNORE); if (mpi_errno) return mpi_errno; mpi_errno = MPI_Type_free(&newtype); if (mpi_errno) return mpi_errno; pof2 *= 2; } free(displs); free(tmp_buf); /* Rotate blocks in recvbuf upwards by (rank + 1) blocks. Need * a temporary buffer of the same size as recvbuf. */ /* get true extent of recvtype */ mpi_errno = MPI_Type_get_true_extent(recvtype, &recvtype_true_lb, &recvtype_true_extent); if (mpi_errno) return mpi_errno; recvbuf_extent = recvcount * comm_size * (MAX(recvtype_true_extent, recvtype_extent)); tmp_buf = malloc(recvbuf_extent); CkAssert(tmp_buf); /* adjust for potential negative lower bound in datatype */ tmp_buf = (void *)((char*)tmp_buf - recvtype_true_lb); MPICH_Localcopy((char *) recvbuf + (rank+1)*recvcount*recvtype_extent, (comm_size - rank - 1)*recvcount, recvtype, tmp_buf, (comm_size - rank - 1)*recvcount, recvtype); MPICH_Localcopy(recvbuf, (rank+1)*recvcount, recvtype, (char *) tmp_buf + (comm_size-rank-1)*recvcount*recvtype_extent, (rank+1)*recvcount, recvtype); /* Blocks are in the reverse order now (comm_size-1 to 0). * Reorder them to (0 to comm_size-1) and store them in recvbuf. */ for (i=0; i<comm_size; i++) MPICH_Localcopy((char *) tmp_buf + i*recvcount*recvtype_extent, recvcount, recvtype, (char *) recvbuf + (comm_size-i-1)*recvcount*recvtype_extent, recvcount, recvtype); free((char*)tmp_buf + recvtype_true_lb); }
int DTCMP_Sortz( const void* inbuf, int count, void** outbuf, int* outcount, MPI_Datatype key, MPI_Datatype keysat, DTCMP_Op cmp, DTCMP_Flags hints, MPI_Comm comm, DTCMP_Handle* handle) { /* check that we're initialized */ if (dtcmp_init_count <= 0) { return DTCMP_FAILURE; } /* check parameters */ if (count < 0) { return DTCMP_FAILURE; } if (outbuf == NULL || outcount == NULL) { return DTCMP_FAILURE; } if (! dtcmp_type_is_valid(key)) { return DTCMP_FAILURE; } if (! dtcmp_type_is_valid(keysat)) { return DTCMP_FAILURE; } /* execute allreduce to compute min/max counts per process, * and sum of all elements */ uint64_t min, max, sum; dtcmp_get_uint64t_min_max_sum(count, &min, &max, &sum, comm); /* nothing to do if the total element count is 0 */ if (sum == 0) { dtcmp_handle_alloc_single(0, outbuf, handle); *outcount = 0; return DTCMP_SUCCESS; } /* can't call sample sort with a single process right now */ int ranks; MPI_Comm_size(comm, &ranks); /* for now, we can only use sample sort if min==max and ranks > 1*/ if (min == max && ranks > 1) { /* TODO: if number of elements per process is small, * call bitonic sort */ return DTCMP_Sortz_samplesort( inbuf, count, outbuf, outcount, key, keysat, cmp, hints, comm, handle ); } /* otherwise, force things into a Sortv but where we allocate and * return memory with a handle */ MPI_Aint keysat_true_lb, keysat_true_extent; MPI_Type_get_true_extent(keysat, &keysat_true_lb, &keysat_true_extent); size_t outbuf_size = count * keysat_true_extent; dtcmp_handle_alloc_single(outbuf_size, outbuf, handle); DTCMP_Sortv(inbuf, *outbuf, count, key, keysat, cmp, hints, comm); *outcount = count; return DTCMP_SUCCESS; }