void DO_OP_LOOP(int dst, int iter) { int i, x; switch (OP_TYPE) { case OP_ACC: for (x = 0; x < iter; x++) { for (i = 0; i < NOP; i++) MPI_Accumulate(&locbuf[0], OP_SIZE, MPI_DOUBLE, dst, 0, OP_SIZE, MPI_DOUBLE, MPI_SUM, win); MPI_Win_flush(dst, win); } break; case OP_PUT: for (x = 0; x < iter; x++) { for (i = 0; i < NOP; i++) MPI_Put(&locbuf[0], OP_SIZE, MPI_DOUBLE, dst, 0, OP_SIZE, MPI_DOUBLE, win); MPI_Win_flush(dst, win); } break; case OP_GET: for (x = 0; x < iter; x++) { for (i = 0; i < NOP; i++) MPI_Get(&locbuf[0], OP_SIZE, MPI_DOUBLE, dst, 0, OP_SIZE, MPI_DOUBLE, win); MPI_Win_flush(dst, win); } break; } }
int oshmpi_trylock(long * lockp) { int is_locked = -1, nil = -1; oshmpi_lock_t *lock = (oshmpi_lock_t *) lockp; lock->prev = -1; /* Get the last tail, if -1 replace with me */ MPI_Compare_and_swap (&shmem_world_rank, &nil, &(lock->prev), MPI_INT, TAIL, TAIL_DISP, oshmpi_lock_win); MPI_Win_flush (TAIL, oshmpi_lock_win); /* Find if the last proc is holding lock */ if (lock->prev != -1) { MPI_Fetch_and_op (NULL, &is_locked, MPI_INT, lock->prev, LOCK_DISP, MPI_NO_OP, oshmpi_lock_win); MPI_Win_flush (lock->prev, oshmpi_lock_win); if (is_locked) return 0; } /* Add myself in tail */ MPI_Fetch_and_op (&shmem_world_rank, &(lock->prev), MPI_INT, TAIL, TAIL_DISP, MPI_REPLACE, oshmpi_lock_win); MPI_Win_flush (TAIL, oshmpi_lock_win); /* Hold lock */ oshmpi_lock_base[LOCK_DISP] = 1; MPI_Win_sync (oshmpi_lock_win); return 1; }
dart_ret_t dart_flush( dart_gptr_t gptr) { MPI_Win win; dart_unit_t target_unitid_abs; int16_t seg_id = gptr.segid; target_unitid_abs = gptr.unitid; DART_LOG_DEBUG("dart_flush() gptr: " "unitid:%d offset:%"PRIu64" segid:%d index:%d", gptr.unitid, gptr.addr_or_offs.offset, gptr.segid, gptr.flags); if (seg_id) { dart_unit_t target_unitid_rel; uint16_t index = gptr.flags; win = dart_win_lists[index]; unit_g2l(index, target_unitid_abs, &target_unitid_rel); DART_LOG_TRACE("dart_flush: MPI_Win_flush"); MPI_Win_flush(target_unitid_rel, win); } else { win = dart_win_local_alloc; DART_LOG_TRACE("dart_flush: MPI_Win_flush"); MPI_Win_flush(target_unitid_abs, win); } DART_LOG_DEBUG("dart_flush > finished"); return DART_OK; }
/** Lock a mutex. * * @param[in] hdl Handle to the mutex * @return MPI status */ int MCS_Mutex_lock(MCS_Mutex hdl) { int prev; /* This store is safe, since it cannot happen concurrently with a remote * write */ hdl->base[MCS_MTX_ELEM_DISP] = -1; MPI_Win_sync(hdl->window); MPI_Fetch_and_op(&shmem_world_rank, &prev, MPI_INT, hdl->tail_rank, MCS_MTX_TAIL_DISP, MPI_REPLACE, hdl->window); MPI_Win_flush(hdl->tail_rank, hdl->window); /* If there was a previous tail, update their next pointer and wait for * notification. Otherwise, the mutex was successfully acquired. */ if (prev != -1) { /* Wait for notification */ MPI_Status status; MPI_Accumulate(&shmem_world_rank, 1, MPI_INT, prev, MCS_MTX_ELEM_DISP, 1, MPI_INT, MPI_REPLACE, hdl->window); MPI_Win_flush(prev, hdl->window); debug_print("%2d: LOCK - waiting for notification from %d\n", shmem_world_rank, prev); MPI_Recv(NULL, 0, MPI_BYTE, prev, MCS_MUTEX_TAG, hdl->comm, &status); } debug_print("%2d: LOCK - lock acquired\n", shmem_world_rank); return MPI_SUCCESS; }
dart_ret_t dart_lock_acquire (dart_lock_t lock) { dart_unit_t unitid; dart_team_myid (lock -> teamid, &unitid); if (lock -> is_acquired == 1) { printf ("Warning: LOCK - %2d has acquired the lock already\n", unitid); return DART_OK; } dart_gptr_t gptr_tail; dart_gptr_t gptr_list; int32_t predecessor[1], result[1]; MPI_Win win; MPI_Status status; DART_GPTR_COPY(gptr_tail, lock -> gptr_tail); DART_GPTR_COPY(gptr_list, lock -> gptr_list); uint64_t offset_tail = gptr_tail.addr_or_offs.offset; int16_t seg_id = gptr_list.segid; dart_unit_t tail = gptr_tail.unitid; uint16_t index = gptr_list.flags; MPI_Aint disp_list; /* MPI-3 newly added feature: atomic operation*/ MPI_Fetch_and_op (&unitid, predecessor, MPI_INT32_T, tail, offset_tail, MPI_REPLACE, dart_win_local_alloc); MPI_Win_flush (tail, dart_win_local_alloc); /* If there was a previous tail (predecessor), update the previous tail's next pointer with unitid * and wait for notification from its predecessor. */ if (*predecessor != -1) { if (dart_adapt_transtable_get_disp (seg_id, *predecessor, &disp_list) == -1) { return DART_ERR_INVAL; } win = dart_win_lists[index]; /* Atomicity: Update its predecessor's next pointer */ MPI_Fetch_and_op (&unitid, result, MPI_INT32_T, *predecessor, disp_list, MPI_REPLACE, win); MPI_Win_flush (*predecessor, win); /* Waiting for notification from its predecessor*/ DART_LOG_DEBUG ("%2d: LOCK - waiting for notification from %d in team %d", unitid, *predecessor, (lock -> teamid)); MPI_Recv (NULL, 0, MPI_INT, *predecessor, 0, dart_teams[index], &status); } DART_LOG_DEBUG ("%2d: LOCK - lock required in team %d", unitid, (lock -> teamid)); lock -> is_acquired = 1; return DART_OK; }
int main(int argc, char **argv){ int i, me, target; unsigned int size; double t, t_max; MPI_Win win; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Win_create(&send_buf, sizeof(char)*MAX_SIZE, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); target = 1 - me; MPI_Win_lock_all(0, win); init_buf(send_buf, me); if(me==0) print_items(); for(size=1;size<MAX_SIZE+1;size*=2){ MPI_Barrier(MPI_COMM_WORLD); for(i=0;i<LOOP+WARMUP;i++){ if(WARMUP == i) t = wtime(); if(me == 0){ MPI_Put(send_buf, size, MPI_CHAR, target, 0, size, MPI_CHAR, win); MPI_Win_flush_local(target, win); while(send_buf[0] == '0' || send_buf[size-1] == '0'){ MPI_Win_flush(me, win); } send_buf[0] = '0'; send_buf[size-1] = '0'; } else { while(send_buf[0] == '1' || send_buf[size-1] == '1'){ MPI_Win_flush(me, win); } send_buf[0] = '1'; send_buf[size-1] = '1'; MPI_Put(send_buf, size, MPI_CHAR, target, 0, size, MPI_CHAR, win); MPI_Win_flush_local(target, win); } } //end of LOOP t = wtime() - t; MPI_Reduce(&t, &t_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); if(me == 0) print_results(size, t_max); } MPI_Win_unlock_all(win); MPI_Win_free(&win); MPI_Finalize(); return 0; }
/** Unlock a mutex. * * @param[in] hdl Handle to the mutex * @return MPI status */ int MCS_Mutex_unlock(MCS_Mutex hdl) { int next; /* Read my next pointer. FOP is used since another process may write to * this location concurrent with this read. */ MPI_Fetch_and_op(NULL, &next, MPI_INT, shmem_world_rank, MCS_MTX_ELEM_DISP, MPI_NO_OP, hdl->window); MPI_Win_flush(shmem_world_rank, hdl->window); if ( next == -1) { int tail; int nil = -1; /* Check if we are the at the tail of the lock queue. If so, we're * done. If not, we need to send notification. */ MPI_Compare_and_swap(&nil, &shmem_world_rank, &tail, MPI_INT, hdl->tail_rank, MCS_MTX_TAIL_DISP, hdl->window); MPI_Win_flush(hdl->tail_rank, hdl->window); if (tail != shmem_world_rank) { debug_print("%2d: UNLOCK - waiting for next pointer (tail = %d)\n", shmem_world_rank, tail); assert(tail >= 0 && tail < shmem_world_size); for (;;) { int flag; MPI_Fetch_and_op(NULL, &next, MPI_INT, shmem_world_rank, MCS_MTX_ELEM_DISP, MPI_NO_OP, hdl->window); MPI_Win_flush(shmem_world_rank, hdl->window); if (next != -1) break; /* Is this here just to poke progress? If yes, then that is lame. */ MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE); } } } /* Notify the next waiting process */ if (next != -1) { debug_print("%2d: UNLOCK - notifying %d\n", shmem_world_rank, next); MPI_Send(NULL, 0, MPI_BYTE, next, MCS_MUTEX_TAG, hdl->comm); } debug_print("%2d: UNLOCK - lock released\n", shmem_world_rank); return MPI_SUCCESS; }
/*Run FOP with flush */ void run_fop_with_flush (int rank, WINDOW type) { int i; MPI_Aint disp = 0; MPI_Win win; MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); allocate_atomic_memory(rank, sbuf_original, rbuf_original, tbuf_original, NULL, (char **)&sbuf, (char **)&rbuf, (char **)&tbuf, NULL, (char **)&rbuf, MAX_MSG_SIZE, type, &win); if(rank == 0) { if (type == WIN_DYNAMIC) { disp = disp_remote; } MPI_CHECK(MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win)); for (i = 0; i < skip + loop; i++) { if (i == skip) { t_start = MPI_Wtime (); } MPI_CHECK(MPI_Fetch_and_op(sbuf, tbuf, MPI_LONG_LONG, 1, disp, MPI_SUM, win)); MPI_CHECK(MPI_Win_flush(1, win)); } t_end = MPI_Wtime (); MPI_CHECK(MPI_Win_unlock(1, win)); } MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); print_latency(rank, 8); free_atomic_memory (sbuf, rbuf, tbuf, NULL, win, rank); }
double message_rate (long * buffer, int size, int iterations, int me, int pairs, int nxtpe, MPI_Win win) { int64_t begin, end; int i, offset; /* * Touch memory */ memset(buffer, size, MAX_MSG_SZ * ITERS_LARGE * sizeof(long)); MPI_Barrier(MPI_COMM_WORLD); if (me < pairs) { begin = TIME(); for (i = 0, offset = 0; i < iterations; i++, offset++) { MPI_Put ((buffer + offset*size), size, MPI_LONG, nxtpe, offset*size, size, MPI_LONG, win); //MPI_Win_flush_local (nxtpe, win); } //MPI_Win_flush_all(win); MPI_Win_flush(nxtpe, win); end = TIME(); return ((double)iterations * 1e6) / ((double)end - (double)begin); } return 0; }
static inline void _wait_puts(const int target_rank, const MPI_Win win) { if(_is_put_blocking){ XACC_DEBUG("flush(%d) for [host|acc]", target_rank); MPI_Win_flush(target_rank, win); }else if(_is_put_local_blocking){ XACC_DEBUG("flush_local(%d) for [host|acc]", target_rank); MPI_Win_flush_local(target_rank, win); } }
MTEST_THREAD_RETURN_TYPE run_test(void *arg) { int i; for (i = 0; i < LOOPS; i++) { /* send a global variable, rather than a stack variable, so * other threads can access the address during flush */ MPI_Put(&dummy, 1, MPI_INT, 0, 0, 1, MPI_INT, win); MPI_Win_flush(0, win); } return (MTEST_THREAD_RETURN_TYPE) NULL; }
void shmemx_ct_set(shmemx_ct_t ct, long value) { #ifdef ENABLE_SMP_OPTIMIZATIONS if (shmem_world_is_smp) { __sync_lock_test_and_set(ct,value); } else #endif { shmem_offset_t win_offset = (ptrdiff_t)((intptr_t)ct - (intptr_t)shmem_sheap_base_ptr); MPI_Fetch_and_op(&value, NULL, MPI_LONG, shmem_world_rank, win_offset, MPI_REPLACE, shmem_sheap_win); MPI_Win_flush(shmem_world_rank, shmem_sheap_win); } return; }
void oshmpi_lock(long * lockp) { MPI_Status status; oshmpi_lock_t *lock = (oshmpi_lock_t *) lockp; /* Replace myself with the last tail */ MPI_Fetch_and_op (&shmem_world_rank, &(lock->prev), MPI_INT, TAIL, TAIL_DISP, MPI_REPLACE, oshmpi_lock_win); MPI_Win_flush (TAIL, oshmpi_lock_win); /* Previous proc holding lock will eventually notify */ if (lock->prev != -1) { /* Send my shmem_world_rank to previous proc's next */ MPI_Accumulate (&shmem_world_rank, 1, MPI_INT, lock->prev, NEXT_DISP, 1, MPI_INT, MPI_REPLACE, oshmpi_lock_win); MPI_Win_flush (lock->prev, oshmpi_lock_win); MPI_Probe (lock->prev, MPI_ANY_TAG, MPI_COMM_WORLD, &status); } /* Hold lock */ oshmpi_lock_base[LOCK_DISP] = 1; MPI_Win_sync (oshmpi_lock_win); return; }
void oshmpi_unlock(long * lockp) { oshmpi_lock_t *lock = (oshmpi_lock_t *) lockp; /* Determine my next process */ MPI_Fetch_and_op (NULL, &(lock->next), MPI_INT, shmem_world_rank, NEXT_DISP, MPI_NO_OP, oshmpi_lock_win); MPI_Win_flush (shmem_world_rank, oshmpi_lock_win); if (lock->next != -1) { MPI_Send (&shmem_world_rank, 1, MPI_INT, lock->next, 999, MPI_COMM_WORLD); } /* Release lock */ oshmpi_lock_base[LOCK_DISP] = -1; MPI_Win_sync (oshmpi_lock_win); return; }
dart_ret_t dart_wait( dart_handle_t handle) { int mpi_ret; DART_LOG_DEBUG("dart_wait() handle:%p", (void*)(handle)); if (handle != NULL) { DART_LOG_TRACE("dart_wait_local: handle->dest: %d", handle->dest); DART_LOG_TRACE("dart_wait_local: handle->win: %"PRIu64"", (uint64_t)handle->win); DART_LOG_TRACE("dart_wait_local: handle->req: %d", handle->request); if (handle->request != MPI_REQUEST_NULL) { MPI_Status mpi_sta; DART_LOG_DEBUG("dart_wait: -- MPI_Wait"); mpi_ret = MPI_Wait(&(handle->request), &mpi_sta); DART_LOG_TRACE("dart_wait: -- mpi_sta.MPI_SOURCE: %d", mpi_sta.MPI_SOURCE); DART_LOG_TRACE("dart_wait: -- mpi_sta.MPI_ERROR: %d:%s", mpi_sta.MPI_ERROR, DART__MPI__ERROR_STR(mpi_sta.MPI_ERROR)); if (mpi_ret != MPI_SUCCESS) { DART_LOG_DEBUG("dart_wait ! MPI_Wait failed"); return DART_ERR_INVAL; } DART_LOG_DEBUG("dart_wait: -- MPI_Win_flush"); mpi_ret = MPI_Win_flush(handle->dest, handle->win); if (mpi_ret != MPI_SUCCESS) { DART_LOG_DEBUG("dart_wait ! MPI_Win_flush failed"); return DART_ERR_INVAL; } } else { DART_LOG_TRACE("dart_wait: handle->request: MPI_REQUEST_NULL"); } /* Free handle resource */ DART_LOG_DEBUG("dart_wait: free handle %p", (void*)(handle)); free(handle); handle = NULL; } DART_LOG_DEBUG("dart_wait > finished"); return DART_OK; }
/** Attempt to acquire a mutex. * * @param[in] hdl Handle to the mutex * @param[out] success Indicates whether the mutex was acquired * @return MPI status */ int MCS_Mutex_trylock(MCS_Mutex hdl, int *success) { int tail, nil = -1; /* This store is safe, since it cannot happen concurrently with a remote * write */ hdl->base[MCS_MTX_ELEM_DISP] = -1; MPI_Win_sync(hdl->window); /* Check if the lock is available and claim it if it is. */ MPI_Compare_and_swap(&shmem_world_rank, &nil, &tail, MPI_INT, hdl->tail_rank, MCS_MTX_TAIL_DISP, hdl->window); MPI_Win_flush(hdl->tail_rank, hdl->window); /* If the old tail was -1, we have claimed the mutex */ *success = (tail == nil) ? 0 : 1; debug_print("%2d: TRYLOCK - %s\n", shmem_world_rank, (*success) ? "Success" : "Non-success"); return MPI_SUCCESS; }
/*Run Get_accumulate with flush */ void run_get_acc_with_flush(int rank, WINDOW type) { int size, i; MPI_Aint disp = 0; MPI_Win win; for (size = 0; size <= MAX_SIZE; size = (size ? size * 2 : size + 1)) { allocate_memory(rank, rbuf, size, type, &win); if (type == WIN_DYNAMIC) { disp = sdisp_remote; } if(size > LARGE_MESSAGE_SIZE) { loop = LOOP_LARGE; skip = SKIP_LARGE; } if(rank == 0) { MPI_CHECK(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 1, 0, win)); for (i = 0; i < skip + loop; i++) { if (i == skip) { t_start = MPI_Wtime (); } MPI_CHECK(MPI_Get_accumulate(sbuf, size, MPI_CHAR, cbuf, size, MPI_CHAR, 1, disp, size, MPI_CHAR, MPI_SUM, win)); MPI_CHECK(MPI_Win_flush(1, win)); } t_end = MPI_Wtime (); MPI_CHECK(MPI_Win_unlock(1, win)); } MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); print_latency(rank, size); MPI_Win_free(&win); } }
dart_ret_t dart_lock_try_acquire (dart_lock_t lock, int32_t *is_acquired) { dart_unit_t unitid; dart_team_myid (lock -> teamid, &unitid); if (lock -> is_acquired == 1) { printf ("Warning: TRYLOCK - %2d has acquired the lock already\n", unitid); return DART_OK; } dart_gptr_t gptr_tail; int32_t result[1]; int32_t compare[1] = {-1}; DART_GPTR_COPY(gptr_tail, lock -> gptr_tail); dart_unit_t tail = gptr_tail.unitid; uint64_t offset = gptr_tail.addr_or_offs.offset; /* Atomicity: Check if the lock is available and claim it if it is. */ MPI_Compare_and_swap (&unitid, compare, result, MPI_INT32_T, tail, offset, dart_win_local_alloc); MPI_Win_flush (tail, dart_win_local_alloc); /* If the old predecessor was -1, we will claim the lock, otherwise, do nothing. */ if (*result == -1) { lock -> is_acquired = 1; *is_acquired = 1; } else { *is_acquired = 0; } DART_LOG_DEBUG("dart_lock_try_acquire: trylock %s in team %d", ((*is_acquired) ? "succeeded" : "failed"), (lock -> teamid)); return DART_OK; }
/* garray_put() */ int64_t garray_put(garray_t *ga, int64_t *lo, int64_t *hi, void *buf_) { int64_t count = (hi[0] - lo[0]) + 1, length = count * ga->elem_size, tlonid, tloidx, thinid, thiidx, tnid, tidx, n, oidx = 0; int8_t *buf = (int8_t *)buf_; calc_target(ga, lo[0], &tlonid, &tloidx); calc_target(ga, hi[0], &thinid, &thiidx); /* is all data going to the same target? */ if (tlonid == thinid) { LOG_DEBUG(ga->g->glog, "[%d] garray put %ld-%ld, single target %ld.%ld\n", ga->g->nid, lo[0], hi[0], tlonid, tloidx); //MPI_Win_lock(MPI_LOCK_EXCLUSIVE, tlonid, 0, ga->win); MPI_Put(buf, length, MPI_INT8_T, tlonid, (tloidx * ga->elem_size), length, MPI_INT8_T, ga->win); //MPI_Win_unlock(tlonid, ga->win); MPI_Win_flush(tlonid, ga->win); return 0; } /* put the data into the lo nid */ n = ga->nelems_per_node + (tlonid < ga->nextra_elems ? 1 : 0) - tloidx; LOG_DEBUG(ga->g->glog, "[%d] garray putting %ld elements into %ld.%ld\n", ga->g->nid, n, tlonid, tloidx); //MPI_Win_lock(MPI_LOCK_SHARED, tlonid, 0, ga->win); MPI_Put(buf, length, MPI_INT8_T, tlonid, (tloidx * ga->elem_size), (n * ga->elem_size), MPI_INT8_T, ga->win); //MPI_Win_unlock(tlonid, ga->win); oidx = (n*ga->elem_size); /* put the data into the in-between nids */ tidx = 0; for (tnid = tlonid + 1; tnid < thinid; ++tnid) { n = ga->nelems_per_node + (tnid < ga->nextra_elems ? 1 : 0); LOG_DEBUG(ga->g->glog, "[%d] garray putting %ld elements into %ld.%ld\n", ga->g->nid, n, tnid, tidx); //MPI_Win_lock(MPI_LOCK_EXCLUSIVE, tnid, 0, ga->win); MPI_Put(&buf[oidx], (n * ga->elem_size), MPI_INT8_T, tnid, 0, (n * ga->elem_size), MPI_INT8_T, ga->win); //MPI_Win_unlock(tnid, ga->win); oidx += (n*ga->elem_size); } /* put the data into the hi nid */ n = thiidx + 1; LOG_DEBUG(ga->g->glog, "[%d] garray putting %ld elements up to %ld.%ld\n", ga->g->nid, n, thinid, thiidx); //MPI_Win_lock(MPI_LOCK_EXCLUSIVE, thinid, 0, ga->win); MPI_Put(&buf[oidx], (n * ga->elem_size), MPI_INT8_T, thinid, 0, (n * ga->elem_size), MPI_INT8_T, ga->win); //MPI_Win_unlock(thinid, ga->win); MPI_Win_flush_all(ga->win); return 0; }
int main(int argc, char *argv[]) { int rank, nproc, i, x; int errors = 0, all_errors = 0; MPI_Win win = MPI_WIN_NULL; MPI_Comm shm_comm = MPI_COMM_NULL; int shm_nproc, shm_rank; double **shm_bases = NULL, *my_base; MPI_Win shm_win = MPI_WIN_NULL; MPI_Group shm_group = MPI_GROUP_NULL, world_group = MPI_GROUP_NULL; int *shm_ranks = NULL, *shm_ranks_in_world = NULL; MPI_Aint get_target_base_offsets = 0; int win_size = sizeof(double) * BUF_CNT; int new_win_size = win_size; int win_unit = sizeof(double); int shm_root_rank_in_world; int origin = -1, put_target, get_target; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Comm_group(MPI_COMM_WORLD, &world_group); if (nproc != 4) { if (rank == 0) printf("Error: must be run with four processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &shm_comm); MPI_Comm_rank(shm_comm, &shm_rank); MPI_Comm_size(shm_comm, &shm_nproc); MPI_Comm_group(shm_comm, &shm_group); /* Platform does not support shared memory or wrong host file, just return. */ if (shm_nproc != 2) { goto exit; } shm_bases = (double **) calloc(shm_nproc, sizeof(double *)); shm_ranks = (int *) calloc(shm_nproc, sizeof(int)); shm_ranks_in_world = (int *) calloc(shm_nproc, sizeof(int)); if (shm_rank == 0) shm_root_rank_in_world = rank; MPI_Bcast(&shm_root_rank_in_world, 1, MPI_INT, 0, shm_comm); /* Identify ranks of target processes which are located on node 0 */ if (rank == 0) { for (i = 0; i < shm_nproc; i++) { shm_ranks[i] = i; } MPI_Group_translate_ranks(shm_group, shm_nproc, shm_ranks, world_group, shm_ranks_in_world); } MPI_Bcast(shm_ranks_in_world, shm_nproc, MPI_INT, 0, MPI_COMM_WORLD); put_target = shm_ranks_in_world[shm_nproc - 1]; get_target = shm_ranks_in_world[0]; /* Identify the rank of origin process which are located on node 1 */ if (shm_root_rank_in_world == 1 && shm_rank == 0) { origin = rank; if (verbose) { printf("---- I am origin = %d, get_target = %d, put_target = %d\n", origin, get_target, put_target); } } /* Allocate shared memory among local processes */ MPI_Win_allocate_shared(win_size, win_unit, MPI_INFO_NULL, shm_comm, &my_base, &shm_win); if (shm_root_rank_in_world == 0 && verbose) { MPI_Aint size; int disp_unit; for (i = 0; i < shm_nproc; i++) { MPI_Win_shared_query(shm_win, i, &size, &disp_unit, &shm_bases[i]); printf("%d -- shared query: base[%d]=%p, size %zd, " "unit %d\n", rank, i, shm_bases[i], size, disp_unit); } } /* Get offset of put target(1) on get target(0) */ get_target_base_offsets = (shm_nproc - 1) * win_size / win_unit; if (origin == rank && verbose) printf("%d -- base_offset of put_target %d on get_target %d: %zd\n", rank, put_target, get_target, get_target_base_offsets); /* Create using MPI_Win_create(). Note that new window size of get_target(0) * is equal to the total size of shm segments on this node, thus get_target * process can read the byte located on put_target process.*/ for (i = 0; i < BUF_CNT; i++) { local_buf[i] = (i + 1) * 1.0; my_base[i] = 0.0; } if (get_target == rank) new_win_size = win_size * shm_nproc; MPI_Win_create(my_base, new_win_size, win_unit, MPI_INFO_NULL, MPI_COMM_WORLD, &win); if (verbose) printf("%d -- new window my_base %p, size %d\n", rank, my_base, new_win_size); MPI_Barrier(MPI_COMM_WORLD); /* Check if flush guarantees the completion of put operations on target side. * * P exclusively locks 2 processes whose windows are shared with each other. * P first put and flush to a process, then get the updated data from another process. * If flush returns before operations are done on the target side, the data may be * incorrect.*/ for (x = 0; x < ITER; x++) { for (i = 0; i < BUF_CNT; i++) { local_buf[i] += x; check_buf[i] = 0; } if (rank == origin) { MPI_Win_lock(MPI_LOCK_EXCLUSIVE, put_target, 0, win); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, get_target, 0, win); for (i = 0; i < BUF_CNT; i++) { MPI_Put(&local_buf[i], 1, MPI_DOUBLE, put_target, i, 1, MPI_DOUBLE, win); } MPI_Win_flush(put_target, win); MPI_Get(check_buf, BUF_CNT, MPI_DOUBLE, get_target, get_target_base_offsets, BUF_CNT, MPI_DOUBLE, win); MPI_Win_flush(get_target, win); for (i = 0; i < BUF_CNT; i++) { if (check_buf[i] != local_buf[i]) { printf("%d(iter %d) - Got check_buf[%d] = %.1lf, expected %.1lf\n", rank, x, i, check_buf[i], local_buf[i]); errors++; } } MPI_Win_unlock(put_target, win); MPI_Win_unlock(get_target, win); } } MPI_Barrier(MPI_COMM_WORLD); MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); exit: if (rank == 0 && all_errors == 0) printf(" No Errors\n"); if (shm_bases) free(shm_bases); if (shm_ranks) free(shm_ranks); if (shm_ranks_in_world) free(shm_ranks_in_world); if (shm_win != MPI_WIN_NULL) MPI_Win_free(&shm_win); if (win != MPI_WIN_NULL) MPI_Win_free(&win); if (shm_comm != MPI_COMM_NULL) MPI_Comm_free(&shm_comm); if (shm_group != MPI_GROUP_NULL) MPI_Group_free(&shm_group); if (world_group != MPI_GROUP_NULL) MPI_Group_free(&world_group); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int rank, nproc, i; int errors = 0, all_errors = 0; int *buf = NULL, *winbuf = NULL; MPI_Win window; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); if (nproc < 2) { if (rank == 0) printf("Error: must be run with two or more processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Alloc_mem(MAX_SIZE * sizeof(int), MPI_INFO_NULL, &buf); MPI_Alloc_mem(MAX_SIZE * sizeof(int), MPI_INFO_NULL, &winbuf); MPI_Win_create(winbuf, MAX_SIZE * sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &window); MPI_Win_lock_all(0, window); /* Test Raccumulate local completion with small data. * Small data is always copied to header packet as immediate data. */ if (rank == 1) { for (i = 0; i < ITER; i++) { MPI_Request acc_req; int val = -1; buf[0] = rank * i; MPI_Raccumulate(&buf[0], 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_MAX, window, &acc_req); MPI_Wait(&acc_req, MPI_STATUS_IGNORE); /* reset local buffer to check local completion */ buf[0] = 0; MPI_Win_flush(0, window); MPI_Get(&val, 1, MPI_INT, 0, 0, 1, MPI_INT, window); MPI_Win_flush(0, window); if (val != rank * i) { printf("%d - Got %d in small Raccumulate test, expected %d (%d * %d)\n", rank, val, rank * i, rank, i); errors++; } } } MPI_Barrier(MPI_COMM_WORLD); /* Test Raccumulate local completion with large data . * Large data is not suitable for 1-copy optimization, and always sent out * from user buffer. */ if (rank == 1) { for (i = 0; i < ITER; i++) { MPI_Request acc_req; int val0 = -1, val1 = -1, val2 = -1; int j; /* initialize data */ for (j = 0; j < MAX_SIZE; j++) { buf[j] = rank + j + i; } MPI_Raccumulate(buf, MAX_SIZE, MPI_INT, 0, 0, MAX_SIZE, MPI_INT, MPI_REPLACE, window, &acc_req); MPI_Wait(&acc_req, MPI_STATUS_IGNORE); /* reset local buffer to check local completion */ buf[0] = 0; buf[MAX_SIZE - 1] = 0; buf[MAX_SIZE / 2] = 0; MPI_Win_flush(0, window); /* get remote values which are modified in local buffer after wait */ MPI_Get(&val0, 1, MPI_INT, 0, 0, 1, MPI_INT, window); MPI_Get(&val1, 1, MPI_INT, 0, MAX_SIZE - 1, 1, MPI_INT, window); MPI_Get(&val2, 1, MPI_INT, 0, MAX_SIZE / 2, 1, MPI_INT, window); MPI_Win_flush(0, window); if (val0 != rank + i) { printf("%d - Got %d in large Raccumulate test, expected %d\n", rank, val0, rank + i); errors++; } if (val1 != rank + MAX_SIZE - 1 + i) { printf("%d - Got %d in large Raccumulate test, expected %d\n", rank, val1, rank + MAX_SIZE - 1 + i); errors++; } if (val2 != rank + MAX_SIZE / 2 + i) { printf("%d - Got %d in large Raccumulate test, expected %d\n", rank, val2, rank + MAX_SIZE / 2 + i); errors++; } } } MPI_Win_unlock_all(window); MPI_Barrier(MPI_COMM_WORLD); MPI_Win_free(&window); if (buf) MPI_Free_mem(buf); if (winbuf) MPI_Free_mem(winbuf); MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0 && all_errors == 0) printf(" No Errors\n"); MPI_Finalize(); return 0; }
int main( int argc, char *argv[] ) { int rank, nproc, i; int errors = 0, all_errors = 0; int *buf; MPI_Win window; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); if (nproc < 2) { if (rank == 0) printf("Error: must be run with two or more processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } /** Create using MPI_Win_create() **/ if (rank == 0) { MPI_Alloc_mem(4*sizeof(int), MPI_INFO_NULL, &buf); *buf = nproc-1; } else buf = NULL; MPI_Win_create(buf, 4*sizeof(int)*(rank == 0), 1, MPI_INFO_NULL, MPI_COMM_WORLD, &window); /* PROC_NULL Communication */ { MPI_Request pn_req[4]; int val[4], res; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, MPI_PROC_NULL, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, MPI_PROC_NULL, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, MPI_PROC_NULL, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, MPI_PROC_NULL, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Win_unlock_all(window); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); } MPI_Barrier(MPI_COMM_WORLD); MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, window); /* GET-ACC: Test third-party communication, through rank 0. */ for (i = 0; i < ITER; i++) { MPI_Request gacc_req; int val = -1, exp = -1; /* Processes form a ring. Process 0 starts first, then passes a token * to the right. Each process, in turn, performs third-party * communication via process 0's window. */ if (rank > 0) { MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } MPI_Rget_accumulate(&rank, 1, MPI_INT, &val, 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_REPLACE, window, &gacc_req); assert(gacc_req != MPI_REQUEST_NULL); MPI_Wait(&gacc_req, MPI_STATUS_IGNORE); MPI_Win_flush(0, window); exp = (rank + nproc-1) % nproc; if (val != exp) { printf("%d - Got %d, expected %d\n", rank, val, exp); errors++; } if (rank < nproc-1) { MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) *buf = nproc-1; MPI_Win_sync(window); /* GET+PUT: Test third-party communication, through rank 0. */ for (i = 0; i < ITER; i++) { MPI_Request req; int val = -1, exp = -1; /* Processes form a ring. Process 0 starts first, then passes a token * to the right. Each process, in turn, performs third-party * communication via process 0's window. */ if (rank > 0) { MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } MPI_Rget(&val, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req); assert(req != MPI_REQUEST_NULL); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Rput(&rank, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req); assert(req != MPI_REQUEST_NULL); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Win_flush(0, window); exp = (rank + nproc-1) % nproc; if (val != exp) { printf("%d - Got %d, expected %d\n", rank, val, exp); errors++; } if (rank < nproc-1) { MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) *buf = nproc-1; MPI_Win_sync(window); /* GET+ACC: Test third-party communication, through rank 0. */ for (i = 0; i < ITER; i++) { MPI_Request req; int val = -1, exp = -1; /* Processes form a ring. Process 0 starts first, then passes a token * to the right. Each process, in turn, performs third-party * communication via process 0's window. */ if (rank > 0) { MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } MPI_Rget(&val, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req); assert(req != MPI_REQUEST_NULL); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Raccumulate(&rank, 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_REPLACE, window, &req); assert(req != MPI_REQUEST_NULL); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Win_flush(0, window); exp = (rank + nproc-1) % nproc; if (val != exp) { printf("%d - Got %d, expected %d\n", rank, val, exp); errors++; } if (rank < nproc-1) { MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); } MPI_Win_unlock(0, window); MPI_Barrier(MPI_COMM_WORLD); /* Wait inside of an epoch */ { MPI_Request pn_req[4]; int val[4], res; const int target = 0; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); MPI_Win_unlock_all(window); } MPI_Barrier(MPI_COMM_WORLD); /* Wait outside of an epoch */ { MPI_Request pn_req[4]; int val[4], res; const int target = 0; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Win_unlock_all(window); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); } /* Wait in a different epoch */ { MPI_Request pn_req[4]; int val[4], res; const int target = 0; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Win_unlock_all(window); MPI_Win_lock_all(0, window); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); MPI_Win_unlock_all(window); } /* Wait in a fence epoch */ { MPI_Request pn_req[4]; int val[4], res; const int target = 0; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Win_unlock_all(window); MPI_Win_fence(0, window); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); MPI_Win_fence(0, window); } MPI_Win_free(&window); if (buf) MPI_Free_mem(buf); MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0 && all_errors == 0) printf(" No Errors\n"); MPI_Finalize(); return 0; }
int main(int argc, char ** argv) { long Block_order; /* number of columns owned by rank */ long Block_size; /* size of a single block */ long Colblock_size; /* size of column block */ int Tile_order=32; /* default Tile order */ int tiling; /* boolean: true if tiling is used */ int Num_procs; /* number of ranks */ long order; /* order of overall matrix */ int send_to, recv_from; /* ranks with which to communicate */ long bytes; /* combined size of matrices */ int my_ID; /* rank */ int root=0; /* rank of root */ int iterations; /* number of times to do the transpose */ int i, j, it, jt, istart;/* dummies */ int iter; /* index of iteration */ int phase; /* phase inside staged communication */ int colstart; /* starting column for owning rank */ int error; /* error flag */ double RESTRICT *A_p; /* original matrix column block */ double RESTRICT *B_p; /* transposed matrix column block */ double RESTRICT *Work_in_p;/* workspace for transpose function */ double RESTRICT *Work_out_p;/* workspace for transpose function */ double abserr, /* absolute error */ abserr_tot; /* aggregate absolute error */ double epsilon = 1.e-8; /* error tolerance */ double local_trans_time, /* timing parameters */ trans_time, avgtime; MPI_Win rma_win = MPI_WIN_NULL; MPI_Info rma_winfo = MPI_INFO_NULL; int passive_target = 0; /* use passive target RMA sync */ #if MPI_VERSION >= 3 int flush_local = 1; /* flush local (or remote) after put */ int flush_bundle = 1; /* flush every <bundle> put calls */ #endif /********************************************************************* ** Initialize the MPI environment *********************************************************************/ MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_ID); MPI_Comm_size(MPI_COMM_WORLD, &Num_procs); /********************************************************************* ** process, test and broadcast input parameters *********************************************************************/ error = 0; if (my_ID == root) { printf("Parallel Research Kernels version %s\n", PRKVERSION); printf("MPIRMA matrix transpose: B = A^T\n"); if (argc <= 3){ printf("Usage: %s <# iterations> <matrix order> [Tile size]" "[sync (0=fence, 1=flush)] [flush local?] [flush bundle]\n", *argv); error = 1; goto ENDOFTESTS; } iterations = atoi(*++argv); if(iterations < 1){ printf("ERROR: iterations must be >= 1 : %d \n",iterations); error = 1; goto ENDOFTESTS; } order = atol(*++argv); if (order < Num_procs) { printf("ERROR: matrix order %ld should at least # procs %d\n", order, Num_procs); error = 1; goto ENDOFTESTS; } if (order%Num_procs) { printf("ERROR: matrix order %ld should be divisible by # procs %d\n", order, Num_procs); error = 1; goto ENDOFTESTS; } if (argc >= 4) Tile_order = atoi(*++argv); if (argc >= 5) passive_target = atoi(*++argv); #if MPI_VERSION >= 3 if (argc >= 6) flush_local = atoi(*++argv); if (argc >= 7) flush_bundle = atoi(*++argv); #endif ENDOFTESTS:; } bail_out(error); if (my_ID == root) { printf("Number of ranks = %d\n", Num_procs); printf("Matrix order = %ld\n", order); printf("Number of iterations = %d\n", iterations); if ((Tile_order > 0) && (Tile_order < order)) printf("Tile size = %d\n", Tile_order); else printf("Untiled\n"); if (passive_target) { #if MPI_VERSION < 3 printf("Synchronization = MPI_Win_(un)lock\n"); #else printf("Synchronization = MPI_Win_flush%s (bundle=%d)\n", flush_local ? "_local" : "", flush_bundle); #endif } else { printf("Synchronization = MPI_Win_fence\n"); } } /* Broadcast input data to all ranks */ MPI_Bcast (&order, 1, MPI_LONG, root, MPI_COMM_WORLD); MPI_Bcast (&iterations, 1, MPI_INT, root, MPI_COMM_WORLD); MPI_Bcast (&Tile_order, 1, MPI_INT, root, MPI_COMM_WORLD); MPI_Bcast (&passive_target, 1, MPI_INT, root, MPI_COMM_WORLD); #if MPI_VERSION >= 3 MPI_Bcast (&flush_local, 1, MPI_INT, root, MPI_COMM_WORLD); MPI_Bcast (&flush_bundle, 1, MPI_INT, root, MPI_COMM_WORLD); #endif /* a non-positive tile size means no tiling of the local transpose */ tiling = (Tile_order > 0) && (Tile_order < order); bytes = 2 * sizeof(double) * order * order; /********************************************************************* ** The matrix is broken up into column blocks that are mapped one to a ** rank. Each column block is made up of Num_procs smaller square ** blocks of order block_order. *********************************************************************/ Block_order = order/Num_procs; colstart = Block_order * my_ID; Colblock_size = order * Block_order; Block_size = Block_order * Block_order; /* debug message size effects */ if (my_ID == root) { printf("Block_size = %ld\n", Block_size); } /********************************************************************* ** Create the column block of the test matrix, the row block of the ** transposed matrix, and workspace (workspace only if #procs>1) *********************************************************************/ A_p = (double *)prk_malloc(Colblock_size*sizeof(double)); if (A_p == NULL){ printf(" Error allocating space for original matrix on node %d\n",my_ID); error = 1; } bail_out(error); MPI_Info_create (&rma_winfo); MPI_Info_set (rma_winfo, "no locks", "true"); B_p = (double *)prk_malloc(Colblock_size*sizeof(double)); if (B_p == NULL){ printf(" Error allocating space for transpose matrix on node %d\n",my_ID); error = 1; } bail_out(error); if (Num_procs>1) { Work_out_p = (double *) prk_malloc(Block_size*(Num_procs-1)*sizeof(double)); if (Work_out_p == NULL){ printf(" Error allocating space for work_out on node %d\n",my_ID); error = 1; } bail_out(error); PRK_Win_allocate(Block_size*(Num_procs-1)*sizeof(double), sizeof(double), rma_winfo, MPI_COMM_WORLD, &Work_in_p, &rma_win); if (Work_in_p == NULL){ printf(" Error allocating space for work on node %d\n",my_ID); error = 1; } bail_out(error); } #if MPI_VERSION >= 3 if (passive_target && Num_procs>1) { MPI_Win_lock_all(MPI_MODE_NOCHECK,rma_win); } #endif /* Fill the original column matrix */ istart = 0; for (j=0;j<Block_order;j++) { for (i=0;i<order; i++) { A(i,j) = (double) (order*(j+colstart) + i); B(i,j) = 0.0; } } MPI_Barrier(MPI_COMM_WORLD); for (iter = 0; iter<=iterations; iter++) { /* start timer after a warmup iteration */ if (iter == 1) { MPI_Barrier(MPI_COMM_WORLD); local_trans_time = wtime(); } /* do the local transpose */ istart = colstart; if (!tiling) { for (i=0; i<Block_order; i++) { for (j=0; j<Block_order; j++) { B(j,i) += A(i,j); A(i,j) += 1.0; } } } else { for (i=0; i<Block_order; i+=Tile_order) { for (j=0; j<Block_order; j+=Tile_order) { for (it=i; it<MIN(Block_order,i+Tile_order); it++) { for (jt=j; jt<MIN(Block_order,j+Tile_order);jt++) { B(jt,it) += A(it,jt); A(it,jt) += 1.0; } } } } } if (!passive_target && Num_procs>1) { MPI_Win_fence(MPI_MODE_NOSTORE | MPI_MODE_NOPRECEDE, rma_win); } for (phase=1; phase<Num_procs; phase++){ send_to = (my_ID - phase + Num_procs)%Num_procs; istart = send_to*Block_order; if (!tiling) { for (i=0; i<Block_order; i++) { for (j=0; j<Block_order; j++) { Work_out(phase-1,j,i) = A(i,j); A(i,j) += 1.0; } } } else { for (i=0; i<Block_order; i+=Tile_order) { for (j=0; j<Block_order; j+=Tile_order) { for (it=i; it<MIN(Block_order,i+Tile_order); it++) { for (jt=j; jt<MIN(Block_order,j+Tile_order);jt++) { Work_out(phase-1,jt,it) = A(it,jt); A(it,jt) += 1.0; } } } } } #if MPI_VERSION < 3 if (passive_target) { MPI_Win_lock(MPI_LOCK_SHARED, send_to, MPI_MODE_NOCHECK, rma_win); } #endif MPI_Put(Work_out_p+Block_size*(phase-1), Block_size, MPI_DOUBLE, send_to, Block_size*(phase-1), Block_size, MPI_DOUBLE, rma_win); if (passive_target) { #if MPI_VERSION < 3 MPI_Win_unlock(send_to, rma_win); #else if (flush_bundle==1) { if (flush_local==1) { MPI_Win_flush_local(send_to, rma_win); } else { MPI_Win_flush(send_to, rma_win); } } else if ( (phase%flush_bundle) == 0) { /* Too lazy to record all targets, so let MPI do it internally (hopefully) */ if (flush_local==1) { MPI_Win_flush_local_all(rma_win); } else { MPI_Win_flush_all(rma_win); } } #endif } } /* end of phase loop for puts */ if (Num_procs>1) { if (passive_target) { #if MPI_VERSION >= 3 MPI_Win_flush_all(rma_win); #endif MPI_Barrier(MPI_COMM_WORLD); } else { MPI_Win_fence(MPI_MODE_NOSTORE, rma_win); } } for (phase=1; phase<Num_procs; phase++) { recv_from = (my_ID + phase)%Num_procs; istart = recv_from*Block_order; /* scatter received block to transposed matrix; no need to tile */ for (j=0; j<Block_order; j++) { for (i=0; i<Block_order; i++) { B(i,j) += Work_in(phase-1,i,j); } } } /* end of phase loop for scatters */ /* for the flush case we need to make sure we have consumed Work_in before overwriting it in the next iteration */ if (Num_procs>1 && passive_target) { MPI_Barrier(MPI_COMM_WORLD); } } /* end of iterations */ local_trans_time = wtime() - local_trans_time; MPI_Reduce(&local_trans_time, &trans_time, 1, MPI_DOUBLE, MPI_MAX, root, MPI_COMM_WORLD); abserr = 0.0; istart = 0; double addit = ((double)(iterations+1) * (double) (iterations))/2.0; for (j=0;j<Block_order;j++) { for (i=0;i<order; i++) { abserr += ABS(B(i,j) - ((double)(order*i + j+colstart)*(iterations+1)+addit)); } } MPI_Reduce(&abserr, &abserr_tot, 1, MPI_DOUBLE, MPI_SUM, root, MPI_COMM_WORLD); if (my_ID == root) { if (abserr_tot < epsilon) { printf("Solution validates\n"); avgtime = trans_time/(double)iterations; printf("Rate (MB/s): %lf Avg time (s): %lf\n",1.0E-06*bytes/avgtime, avgtime); } else { printf("ERROR: Aggregate absolute error %lf exceeds threshold %e\n", abserr_tot, epsilon); error = 1; } } bail_out(error); if (rma_win!=MPI_WIN_NULL) { #if MPI_VERSION >=3 if (passive_target) { MPI_Win_unlock_all(rma_win); } #endif PRK_Win_free(&rma_win); } MPI_Finalize(); exit(EXIT_SUCCESS); } /* end of main */
void IMB_rma_compare_and_swap (struct comm_info* c_info, int size, struct iter_schedule* iterations, MODES run_mode, double* time) { double res_time = -1.; int root = c_info->pair1; int s_size; int i; void *comp_b, *orig_b, *res_b; MPI_Datatype data_type = MPI_INT; ierr = 0; if (c_info->rank < 0) { *time = res_time; return; } MPI_Type_size(data_type,&s_size); for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator); if (c_info->rank == c_info->pair0) { /* use r_buffer for all buffers required by compare_and_swap, because * on all ranks r_buffer is zero-initialized in IMB_set_buf function */ orig_b = (char*)c_info->r_buffer + s_size*2; comp_b = (char*)c_info->r_buffer + s_size; res_b = c_info->r_buffer; MPI_Win_lock(MPI_LOCK_SHARED, root, 0, c_info->WIN); if (run_mode->AGGREGATE) { res_time = MPI_Wtime(); for (i = 0; i < iterations->n_sample; i++) { ierr = MPI_Compare_and_swap( (char*)orig_b + i%iterations->r_cache_iter*iterations->r_offs, (char*)comp_b + i%iterations->r_cache_iter*iterations->r_offs, (char*)res_b + i%iterations->r_cache_iter*iterations->r_offs, data_type, root, i%iterations->r_cache_iter*iterations->r_offs, c_info->WIN ); MPI_ERRHAND(ierr); } ierr = MPI_Win_flush(root, c_info->WIN); res_time = (MPI_Wtime() - res_time)/iterations->n_sample; } else if ( !run_mode->AGGREGATE ) { res_time = MPI_Wtime(); for (i = 0; i < iterations->n_sample; i++) { ierr = MPI_Compare_and_swap( (char*)orig_b + i%iterations->s_cache_iter*iterations->s_offs, (char*)comp_b + i%iterations->s_cache_iter*iterations->s_offs, (char*)res_b + i%iterations->r_cache_iter*iterations->r_offs, data_type, root, i%iterations->r_cache_iter*iterations->r_offs, c_info->WIN ); MPI_ERRHAND(ierr); ierr = MPI_Win_flush(root, c_info->WIN); MPI_ERRHAND(ierr); } res_time = (MPI_Wtime() - res_time)/iterations->n_sample; } MPI_Win_unlock(root, c_info->WIN); } MPI_Barrier(c_info->communicator); *time = res_time; return; }
void run_rma_test(int nprocs_per_node) { int myrank, nprocs; int mem_rank; MPI_Win win; int *baseptr; MPI_Aint local_size; MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); if (nprocs < nprocs_per_node * 2) { if (!myrank) printf("should start program with at least %d processes\n", nprocs_per_node * 2); MPI_Finalize(); exit(EXIT_FAILURE); } mem_rank = nprocs_per_node + nprocs_per_node / 2; local_size = (myrank == mem_rank) ? COUNT : 0; MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &win); MPI_Win_lock_all(0, win); int type_size; MPI_Type_size(MPI_INT, &type_size); size_t nbytes = COUNT * type_size; assert(MPI_Alloc_mem(nbytes, MPI_INFO_NULL, &baseptr) == MPI_SUCCESS); assert(MPI_Win_attach(win, baseptr, nbytes) == MPI_SUCCESS); MPI_Aint ldisp; MPI_Aint *disps = malloc(nprocs * sizeof(MPI_Aint)); assert(MPI_Get_address(baseptr, &ldisp) == MPI_SUCCESS); assert(MPI_Allgather(&ldisp, 1, MPI_AINT, disps, nprocs, MPI_AINT, MPI_COMM_WORLD) == MPI_SUCCESS); if (myrank == 0) { for (size_t idx = 0; idx < COUNT; ++idx) { baseptr[idx] = idx * COUNT + 1; } } MPI_Barrier(MPI_COMM_WORLD); if (myrank == mem_rank) { assert(MPI_Get(baseptr, 10, MPI_INT, 0, disps[0], 10, MPI_INT, win) == MPI_SUCCESS); assert(MPI_Win_flush(0, win) == MPI_SUCCESS); for (size_t idx = 0; idx < COUNT; ++idx) { assert(baseptr[idx] == idx * 10 + 1); } } MPI_Barrier(MPI_COMM_WORLD); MPI_Win_unlock_all(win); MPI_Barrier(MPI_COMM_WORLD); MPI_Win_free(&win); MPI_Free_mem(baseptr); printf("Test finished\n"); }
void IMB_rma_accumulate (struct comm_info* c_info, int size, struct iter_schedule* iterations, MODES run_mode, double* time) { double res_time = -1.; Type_Size s_size,r_size; int s_num, r_num; /* IMB 3.1 << */ int r_off; int i; int root = c_info->pair1; ierr = 0; if (c_info->rank < 0) { *time = res_time; return; } MPI_Type_size(c_info->red_data_type,&s_size); s_num=size/s_size; r_size=s_size; r_num=s_num; r_off=iterations->r_offs/r_size; for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator); if (c_info->rank == c_info->pair0) { MPI_Win_lock(MPI_LOCK_SHARED, root, 0, c_info->WIN); if (run_mode->AGGREGATE) { res_time = MPI_Wtime(); for (i = 0; i < iterations->n_sample; i++) { ierr = MPI_Accumulate( (char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs, s_num, c_info->red_data_type, root, i%iterations->r_cache_iter*r_off, r_num, c_info->red_data_type, c_info->op_type, c_info->WIN ); MPI_ERRHAND(ierr); } ierr = MPI_Win_flush(root, c_info->WIN); res_time = (MPI_Wtime() - res_time)/iterations->n_sample; } else if ( !run_mode->AGGREGATE ) { res_time = MPI_Wtime(); for (i = 0; i < iterations->n_sample; i++) { ierr = MPI_Accumulate( (char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs, s_num, c_info->red_data_type, root, i%iterations->r_cache_iter*r_off, r_num, c_info->red_data_type, c_info->op_type, c_info->WIN ); MPI_ERRHAND(ierr); ierr = MPI_Win_flush(root, c_info->WIN); MPI_ERRHAND(ierr); } res_time = (MPI_Wtime() - res_time)/iterations->n_sample; } MPI_Win_unlock(root, c_info->WIN); } MPI_Barrier(c_info->communicator); *time = res_time; return; }
int main(int argc, char *argv[]) { int rank, size, i, j, k; int errors = 0; int origin_shm, origin_am, dest; int *orig_buf = NULL, *result_buf = NULL, *compare_buf = NULL, *target_buf = NULL, *check_buf = NULL; MPI_Win win; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size != 3) { /* run this test with three processes */ goto exit_test; } /* this works when MPIR_PARAM_CH3_ODD_EVEN_CLIQUES is set */ dest = 2; origin_shm = 0; origin_am = 1; if (rank != dest) { MPI_Alloc_mem(sizeof(int), MPI_INFO_NULL, &orig_buf); MPI_Alloc_mem(sizeof(int), MPI_INFO_NULL, &result_buf); MPI_Alloc_mem(sizeof(int), MPI_INFO_NULL, &compare_buf); } MPI_Win_allocate(sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &target_buf, &win); for (k = 0; k < LOOP_SIZE; k++) { /* init buffers */ if (rank == origin_shm) { orig_buf[0] = 1; compare_buf[0] = 0; result_buf[0] = 0; } else if (rank == origin_am) { orig_buf[0] = 0; compare_buf[0] = 1; result_buf[0] = 0; } else { MPI_Win_lock(MPI_LOCK_SHARED, rank, 0, win); target_buf[0] = 0; MPI_Win_unlock(rank, win); } MPI_Barrier(MPI_COMM_WORLD); /* perform FOP */ MPI_Win_lock_all(0, win); if (rank != dest) { MPI_Compare_and_swap(orig_buf, compare_buf, result_buf, MPI_INT, dest, 0, win); MPI_Win_flush(dest, win); } MPI_Win_unlock_all(win); MPI_Barrier(MPI_COMM_WORLD); /* check results */ if (rank != dest) { MPI_Gather(result_buf, 1, MPI_INT, check_buf, 1, MPI_INT, dest, MPI_COMM_WORLD); } else { MPI_Alloc_mem(sizeof(int) * 3, MPI_INFO_NULL, &check_buf); MPI_Gather(target_buf, 1, MPI_INT, check_buf, 1, MPI_INT, dest, MPI_COMM_WORLD); if (!(check_buf[dest] == 0 && check_buf[origin_shm] == 0 && check_buf[origin_am] == 1) && !(check_buf[dest] == 1 && check_buf[origin_shm] == 0 && check_buf[origin_am] == 0)) { printf ("Wrong results: target result = %d, origin_shm result = %d, origin_am result = %d\n", check_buf[dest], check_buf[origin_shm], check_buf[origin_am]); printf ("Expected results (1): target result = 1, origin_shm result = 0, origin_am result = 0\n"); printf ("Expected results (2): target result = 0, origin_shm result = 0, origin_am result = 1\n"); errors++; } MPI_Free_mem(check_buf); } } MPI_Win_free(&win); if (rank == origin_am || rank == origin_shm) { MPI_Free_mem(orig_buf); MPI_Free_mem(result_buf); MPI_Free_mem(compare_buf); } exit_test: if (rank == dest && errors == 0) printf(" No Errors\n"); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int i, rank, nproc; int shm_rank, shm_nproc; MPI_Aint size; int errors = 0, all_errors = 0; int **bases = NULL, *my_base = NULL; int disp_unit; MPI_Win shm_win = MPI_WIN_NULL, win = MPI_WIN_NULL; MPI_Comm shm_comm = MPI_COMM_NULL; MPI_Group shm_group = MPI_GROUP_NULL, world_group = MPI_GROUP_NULL; int dst_shm_rank, dst_world_rank; MPI_Info create_info = MPI_INFO_NULL; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &shm_comm); MPI_Comm_rank(shm_comm, &shm_rank); MPI_Comm_size(shm_comm, &shm_nproc); /* Platform does not support shared memory, just return. */ if (shm_nproc < 2) { goto exit; } /* Specify the last process in the node as the target process */ dst_shm_rank = shm_nproc - 1; MPI_Comm_group(shm_comm, &shm_group); MPI_Comm_group(MPI_COMM_WORLD, &world_group); MPI_Group_translate_ranks(shm_group, 1, &dst_shm_rank, world_group, &dst_world_rank); bases = calloc(shm_nproc, sizeof(int *)); /* Allocate shm window among local processes, then create a global window with * those shm window buffers */ MPI_Win_allocate_shared(sizeof(int) * ELEM_PER_PROC, sizeof(int), MPI_INFO_NULL, shm_comm, &my_base, &shm_win); if (verbose) printf("%d -- allocate shared: my_base = %p, absolute base\n", shm_rank, my_base); for (i = 0; i < shm_nproc; i++) { MPI_Win_shared_query(shm_win, i, &size, &disp_unit, &bases[i]); if (verbose) printf("%d -- shared query: base[%d]=%p, size %ld, unit %d\n", shm_rank, i, bases[i], size, disp_unit); } #ifdef USE_INFO_ALLOC_SHM MPI_Info_create(&create_info); MPI_Info_set(create_info, "alloc_shm", "true"); #else create_info = MPI_INFO_NULL; #endif MPI_Win_create(my_base, sizeof(int) * ELEM_PER_PROC, sizeof(int), create_info, MPI_COMM_WORLD, &win); /* Reset data */ for (i = 0; i < ELEM_PER_PROC; i++) { my_base[i] = 0; local_buf[i] = i + 1; } /* Do RMA through global window, then check value through shared window */ MPI_Win_lock_all(0, win); MPI_Win_lock_all(0, shm_win); if (shm_rank == 0) { MPI_Put(&local_buf[0], 1, MPI_INT, dst_world_rank, 0, 1, MPI_INT, win); MPI_Put(&local_buf[ELEM_PER_PROC - 1], 1, MPI_INT, dst_world_rank, ELEM_PER_PROC - 1, 1, MPI_INT, win); MPI_Win_flush(dst_world_rank, win); } MPI_Win_sync(shm_win); MPI_Barrier(shm_comm); MPI_Win_sync(shm_win); if (bases[dst_shm_rank][0] != local_buf[0]) { errors++; printf("%d -- Got %d at rank %d index %d, expected %d\n", rank, bases[dst_shm_rank][0], dst_shm_rank, 0, local_buf[0]); } if (bases[dst_shm_rank][ELEM_PER_PROC - 1] != local_buf[ELEM_PER_PROC - 1]) { errors++; printf("%d -- Got %d at rank %d index %d, expected %d\n", rank, bases[dst_shm_rank][ELEM_PER_PROC - 1], dst_shm_rank, ELEM_PER_PROC - 1, local_buf[ELEM_PER_PROC - 1]); } MPI_Win_unlock_all(shm_win); MPI_Win_unlock_all(win); MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Win_free(&win); MPI_Win_free(&shm_win); exit: if (rank == 0 && all_errors == 0) printf(" No Errors\n"); if (create_info != MPI_INFO_NULL) MPI_Info_free(&create_info); if (shm_comm != MPI_COMM_NULL) MPI_Comm_free(&shm_comm); if (shm_group != MPI_GROUP_NULL) MPI_Group_free(&shm_group); if (world_group != MPI_GROUP_NULL) MPI_Group_free(&world_group); MPI_Finalize(); if (bases) free(bases); return 0; }
dart_ret_t dart_waitall( dart_handle_t * handle, size_t n) { int i, r_n; int num_handles = (int)n; DART_LOG_DEBUG("dart_waitall()"); if (n == 0) { DART_LOG_ERROR("dart_waitall > number of handles = 0"); return DART_OK; } if (n > INT_MAX) { DART_LOG_ERROR("dart_waitall ! number of handles > INT_MAX"); return DART_ERR_INVAL; } DART_LOG_DEBUG("dart_waitall: number of handles: %d", num_handles); if (*handle) { MPI_Status *mpi_sta; MPI_Request *mpi_req; mpi_req = (MPI_Request *) malloc(num_handles * sizeof(MPI_Request)); mpi_sta = (MPI_Status *) malloc(num_handles * sizeof(MPI_Status)); /* * copy requests from DART handles to MPI request array: */ DART_LOG_TRACE("dart_waitall: copying DART handles to MPI request array"); r_n = 0; for (i = 0; i < num_handles; i++) { if (handle[i] != NULL) { DART_LOG_DEBUG("dart_waitall: -- handle[%d](%p): " "dest:%d win:%"PRIu64" req:%"PRIu64"", i, (void*)handle[i], handle[i]->dest, (uint64_t)handle[i]->win, (uint64_t)handle[i]->request); mpi_req[r_n] = handle[i]->request; r_n++; } } /* * wait for communication of MPI requests: */ DART_LOG_DEBUG("dart_waitall: MPI_Waitall, %d requests from %d handles", r_n, num_handles); /* From the MPI 3.1 standard: * * The i-th entry in array_of_statuses is set to the return * status of the i-th operation. Active persistent requests * are marked inactive. * Requests of any other type are deallocated and the * corresponding handles in the array are set to * MPI_REQUEST_NULL. * The list may contain null or inactive handles. * The call sets to empty the status of each such entry. */ if (r_n > 0) { if (MPI_Waitall(r_n, mpi_req, mpi_sta) == MPI_SUCCESS) { DART_LOG_DEBUG("dart_waitall: MPI_Waitall completed"); } else { DART_LOG_ERROR("dart_waitall: MPI_Waitall failed"); DART_LOG_TRACE("dart_waitall: free MPI_Request temporaries"); free(mpi_req); DART_LOG_TRACE("dart_waitall: free MPI_Status temporaries"); free(mpi_sta); return DART_ERR_INVAL; } } else { DART_LOG_DEBUG("dart_waitall > number of requests = 0"); return DART_OK; } /* * copy MPI requests back to DART handles: */ DART_LOG_TRACE("dart_waitall: copying MPI requests back to DART handles"); r_n = 0; for (i = 0; i < num_handles; i++) { if (handle[i]) { if (mpi_req[r_n] == MPI_REQUEST_NULL) { DART_LOG_TRACE("dart_waitall: -- mpi_req[%d] = MPI_REQUEST_NULL", r_n); } else { DART_LOG_TRACE("dart_waitall: -- mpi_req[%d] = %d", r_n, mpi_req[r_n]); } DART_LOG_TRACE("dart_waitall: -- mpi_sta[%d].MPI_SOURCE: %d", r_n, mpi_sta[r_n].MPI_SOURCE); DART_LOG_TRACE("dart_waitall: -- mpi_sta[%d].MPI_ERROR: %d:%s", r_n, mpi_sta[r_n].MPI_ERROR, DART__MPI__ERROR_STR(mpi_sta[r_n].MPI_ERROR)); handle[i]->request = mpi_req[r_n]; r_n++; } } /* * wait for completion of MPI requests at origins and targets: */ DART_LOG_DEBUG("dart_waitall: waiting for remote completion"); for (i = 0; i < num_handles; i++) { if (handle[i]) { if (handle[i]->request == MPI_REQUEST_NULL) { DART_LOG_TRACE("dart_waitall: -- handle[%d] done (MPI_REQUEST_NULL)", i); } else { DART_LOG_DEBUG("dart_waitall: -- MPI_Win_flush(handle[%d]: %p))", i, (void*)handle[i]); DART_LOG_TRACE("dart_waitall: handle[%d]->dest: %d", i, handle[i]->dest); DART_LOG_TRACE("dart_waitall: handle[%d]->win: %"PRIu64"", i, (uint64_t)handle[i]->win); DART_LOG_TRACE("dart_waitall: handle[%d]->req: %"PRIu64"", i, (uint64_t)handle[i]->request); /* * MPI_Win_flush to wait for remote completion: */ if (MPI_Win_flush(handle[i]->dest, handle[i]->win) != MPI_SUCCESS) { DART_LOG_ERROR("dart_waitall: MPI_Win_flush failed"); DART_LOG_TRACE("dart_waitall: free MPI_Request temporaries"); free(mpi_req); DART_LOG_TRACE("dart_waitall: free MPI_Status temporaries"); free(mpi_sta); return DART_ERR_INVAL; } DART_LOG_TRACE("dart_waitall: -- MPI_Request_free"); if (MPI_Request_free(&handle[i]->request) != MPI_SUCCESS) { DART_LOG_ERROR("dart_waitall: MPI_Request_free failed"); DART_LOG_TRACE("dart_waitall: free MPI_Request temporaries"); free(mpi_req); DART_LOG_TRACE("dart_waitall: free MPI_Status temporaries"); free(mpi_sta); return DART_ERR_INVAL; } } } } /* * free memory: */ DART_LOG_DEBUG("dart_waitall: free handles"); for (i = 0; i < num_handles; i++) { if (handle[i]) { /* Free handle resource */ DART_LOG_TRACE("dart_waitall: -- free handle[%d]: %p", i, (void*)(handle[i])); free(handle[i]); handle[i] = NULL; } } DART_LOG_TRACE("dart_waitall: free MPI_Request temporaries"); free(mpi_req); DART_LOG_TRACE("dart_waitall: free MPI_Status temporaries"); free(mpi_sta); } DART_LOG_DEBUG("dart_waitall > finished"); return DART_OK; }
/** * TODO: Check if MPI_Accumulate (REPLACE) can bring better performance? */ dart_ret_t dart_get_blocking( void * dest, dart_gptr_t gptr, size_t nbytes) { MPI_Win win; MPI_Aint disp_s, disp_rel; dart_unit_t target_unitid_abs = gptr.unitid; dart_unit_t target_unitid_rel = target_unitid_abs; uint64_t offset = gptr.addr_or_offs.offset; int16_t seg_id = gptr.segid; uint16_t index = gptr.flags; /* * MPI uses offset type int, do not copy more than INT_MAX elements: */ if (nbytes > INT_MAX) { DART_LOG_ERROR("dart_get_blocking ! failed: nbytes > INT_MAX"); return DART_ERR_INVAL; } if (seg_id) { unit_g2l(index, target_unitid_abs, &target_unitid_rel); } DART_LOG_DEBUG("dart_get_blocking() uid_abs:%d uid_rel:%d " "o:%"PRIu64" s:%d i:%u, nbytes:%zu", target_unitid_abs, target_unitid_rel, offset, seg_id, index, nbytes); #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) DART_LOG_DEBUG("dart_get_blocking: shared windows enabled"); if (seg_id >= 0) { int i; char * baseptr; /* * Use memcpy if the target is in the same node as the calling unit: * The value of i will be the target's relative ID in teamid. */ i = dart_sharedmem_table[index][gptr.unitid]; if (i >= 0) { DART_LOG_DEBUG("dart_get_blocking: shared memory segment, seg_id:%d", seg_id); if (seg_id) { if (dart_adapt_transtable_get_baseptr(seg_id, i, &baseptr) == -1) { DART_LOG_ERROR("dart_get_blocking ! " "dart_adapt_transtable_get_baseptr failed"); return DART_ERR_INVAL; } } else { baseptr = dart_sharedmem_local_baseptr_set[i]; } baseptr += offset; DART_LOG_DEBUG("dart_get_blocking: memcpy %zu bytes", nbytes); memcpy((char*)dest, baseptr, nbytes); return DART_OK; } } #else DART_LOG_DEBUG("dart_get_blocking: shared windows disabled"); #endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ /* * MPI shared windows disabled or target and calling unit are on different * nodes, use MPI_Rget: */ if (seg_id) { if (dart_adapt_transtable_get_disp( seg_id, target_unitid_rel, &disp_s) == -1) { DART_LOG_ERROR("dart_get_blocking ! " "dart_adapt_transtable_get_disp failed"); return DART_ERR_INVAL; } win = dart_win_lists[index]; disp_rel = disp_s + offset; DART_LOG_DEBUG("dart_get_blocking: nbytes:%zu " "source (coll.): win:%"PRIu64" unit:%d offset:%"PRIu64" " "-> dest: %p", nbytes, (uint64_t)win, target_unitid_rel, (uint64_t)disp_rel, dest); } else { win = dart_win_local_alloc; disp_rel = offset; DART_LOG_DEBUG("dart_get_blocking: nbytes:%zu " "source (local): win:%"PRIu64" unit:%d offset:%"PRIu64" " "-> dest: %p", nbytes, (uint64_t)win, target_unitid_rel, (uint64_t)disp_rel, dest); } /* * Using MPI_Get as MPI_Win_flush is required to ensure remote completion. */ DART_LOG_DEBUG("dart_get_blocking: MPI_Get"); if (MPI_Get(dest, nbytes, MPI_BYTE, target_unitid_rel, disp_rel, nbytes, MPI_BYTE, win) != MPI_SUCCESS) { DART_LOG_ERROR("dart_get_blocking ! MPI_Get failed"); return DART_ERR_INVAL; } DART_LOG_DEBUG("dart_get_blocking: MPI_Win_flush"); if (MPI_Win_flush(target_unitid_rel, win) != MPI_SUCCESS) { DART_LOG_ERROR("dart_get_blocking ! MPI_Win_flush failed"); return DART_ERR_INVAL; } DART_LOG_DEBUG("dart_get_blocking > finished"); return DART_OK; }