예제 #1
0
void DO_OP_LOOP(int dst, int iter)
{
    int i, x;

    switch (OP_TYPE) {
    case OP_ACC:
        for (x = 0; x < iter; x++) {
            for (i = 0; i < NOP; i++)
                MPI_Accumulate(&locbuf[0], OP_SIZE, MPI_DOUBLE, dst, 0, OP_SIZE, MPI_DOUBLE,
                               MPI_SUM, win);
            MPI_Win_flush(dst, win);
        }
        break;
    case OP_PUT:
        for (x = 0; x < iter; x++) {
            for (i = 0; i < NOP; i++)
                MPI_Put(&locbuf[0], OP_SIZE, MPI_DOUBLE, dst, 0, OP_SIZE, MPI_DOUBLE, win);
            MPI_Win_flush(dst, win);
        }
        break;
    case OP_GET:
        for (x = 0; x < iter; x++) {
            for (i = 0; i < NOP; i++)
                MPI_Get(&locbuf[0], OP_SIZE, MPI_DOUBLE, dst, 0, OP_SIZE, MPI_DOUBLE, win);
            MPI_Win_flush(dst, win);
        }
        break;
    }
}
예제 #2
0
int oshmpi_trylock(long * lockp)
{
  int is_locked = -1, nil = -1;
  oshmpi_lock_t *lock = (oshmpi_lock_t *) lockp;
  lock->prev = -1;
  /* Get the last tail, if -1 replace with me */
  MPI_Compare_and_swap (&shmem_world_rank, &nil, &(lock->prev), MPI_INT,
			TAIL, TAIL_DISP, oshmpi_lock_win);
  MPI_Win_flush (TAIL, oshmpi_lock_win);
  /* Find if the last proc is holding lock */
  if (lock->prev != -1)
    {
      MPI_Fetch_and_op (NULL, &is_locked, MPI_INT, lock->prev,
			LOCK_DISP, MPI_NO_OP, oshmpi_lock_win);
      MPI_Win_flush (lock->prev, oshmpi_lock_win);

      if (is_locked)
	return 0;
    }
  /* Add myself in tail */
  MPI_Fetch_and_op (&shmem_world_rank, &(lock->prev), MPI_INT, TAIL,
		    TAIL_DISP, MPI_REPLACE, oshmpi_lock_win);
  MPI_Win_flush (TAIL, oshmpi_lock_win);
  /* Hold lock */
  oshmpi_lock_base[LOCK_DISP] = 1;
  MPI_Win_sync (oshmpi_lock_win);

  return 1;
}
예제 #3
0
dart_ret_t dart_flush(
  dart_gptr_t gptr)
{
  MPI_Win     win;
  dart_unit_t target_unitid_abs;
  int16_t     seg_id = gptr.segid;
  target_unitid_abs  = gptr.unitid;
  DART_LOG_DEBUG("dart_flush() gptr: "
                 "unitid:%d offset:%"PRIu64" segid:%d index:%d",
                 gptr.unitid, gptr.addr_or_offs.offset,
                 gptr.segid,  gptr.flags);
  if (seg_id) {
    dart_unit_t target_unitid_rel;
    uint16_t    index = gptr.flags;
    win               = dart_win_lists[index];
    unit_g2l(index, target_unitid_abs, &target_unitid_rel);
    DART_LOG_TRACE("dart_flush: MPI_Win_flush");
    MPI_Win_flush(target_unitid_rel, win);
  } else {
    win = dart_win_local_alloc;
    DART_LOG_TRACE("dart_flush: MPI_Win_flush");
    MPI_Win_flush(target_unitid_abs, win);
  }
  DART_LOG_DEBUG("dart_flush > finished");
  return DART_OK;
}
예제 #4
0
파일: mcs-lock.c 프로젝트: coti/oshmpi
/** Lock a mutex.
 *
 * @param[in] hdl   Handle to the mutex
 * @return          MPI status
 */
int MCS_Mutex_lock(MCS_Mutex hdl)
{
	int prev;

	/* This store is safe, since it cannot happen concurrently with a remote
	 * write */
	hdl->base[MCS_MTX_ELEM_DISP] = -1;
	MPI_Win_sync(hdl->window);

	MPI_Fetch_and_op(&shmem_world_rank, &prev, MPI_INT, hdl->tail_rank, MCS_MTX_TAIL_DISP,
			MPI_REPLACE, hdl->window);
	MPI_Win_flush(hdl->tail_rank, hdl->window);

	/* If there was a previous tail, update their next pointer and wait for
	 * notification.  Otherwise, the mutex was successfully acquired. */
	if (prev != -1) {
		/* Wait for notification */
		MPI_Status status;

		MPI_Accumulate(&shmem_world_rank, 1, MPI_INT, prev, MCS_MTX_ELEM_DISP, 1, MPI_INT, MPI_REPLACE, hdl->window);
		MPI_Win_flush(prev, hdl->window);

		debug_print("%2d: LOCK   - waiting for notification from %d\n", shmem_world_rank, prev);
		MPI_Recv(NULL, 0, MPI_BYTE, prev, MCS_MUTEX_TAG, hdl->comm, &status);
	}

	debug_print("%2d: LOCK   - lock acquired\n", shmem_world_rank);

	return MPI_SUCCESS;
}
예제 #5
0
dart_ret_t dart_lock_acquire (dart_lock_t lock)
{
	dart_unit_t unitid;
	dart_team_myid (lock -> teamid, &unitid);

	if (lock -> is_acquired == 1)
	{
		printf ("Warning: LOCK	- %2d has acquired the lock already\n", unitid);
		return DART_OK;
	}

	dart_gptr_t gptr_tail;
	dart_gptr_t gptr_list;
	int32_t predecessor[1], result[1];

	MPI_Win win;
	MPI_Status status;

	DART_GPTR_COPY(gptr_tail, lock -> gptr_tail);
	DART_GPTR_COPY(gptr_list, lock -> gptr_list);

	uint64_t offset_tail = gptr_tail.addr_or_offs.offset;
	int16_t seg_id = gptr_list.segid;
	dart_unit_t tail = gptr_tail.unitid;
	uint16_t index = gptr_list.flags;
	MPI_Aint disp_list;


	/* MPI-3 newly added feature: atomic operation*/
	MPI_Fetch_and_op (&unitid, predecessor, MPI_INT32_T, tail, offset_tail, MPI_REPLACE, dart_win_local_alloc);
	MPI_Win_flush (tail, dart_win_local_alloc);

	/* If there was a previous tail (predecessor), update the previous tail's next pointer with unitid
	 * and wait for notification from its predecessor. */
	if (*predecessor != -1)
	{
		if (dart_adapt_transtable_get_disp (seg_id, *predecessor, &disp_list) == -1)
		{
			return DART_ERR_INVAL;
		}
		win = dart_win_lists[index];

		/* Atomicity: Update its predecessor's next pointer */
		MPI_Fetch_and_op (&unitid, result, MPI_INT32_T, *predecessor, disp_list, MPI_REPLACE, win);

		MPI_Win_flush (*predecessor, win);

		/* Waiting for notification from its predecessor*/
		DART_LOG_DEBUG ("%2d: LOCK	- waiting for notification from %d in team %d",
				unitid, *predecessor, (lock -> teamid));

		MPI_Recv (NULL, 0, MPI_INT, *predecessor, 0, dart_teams[index], &status);
	}

	DART_LOG_DEBUG ("%2d: LOCK	- lock required in team %d", unitid, (lock -> teamid));
	lock -> is_acquired = 1;
	return DART_OK;
}
예제 #6
0
파일: mpi3_put.c 프로젝트: mnakao/pingpong
int main(int argc, char **argv){
  int i, me, target;
  unsigned int size;
  double t, t_max;
  MPI_Win win;

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &me);
  MPI_Win_create(&send_buf, sizeof(char)*MAX_SIZE, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win);
  
  target = 1 - me;
  MPI_Win_lock_all(0, win);
  
  init_buf(send_buf, me);

  if(me==0) print_items();

  for(size=1;size<MAX_SIZE+1;size*=2){
    MPI_Barrier(MPI_COMM_WORLD);
    for(i=0;i<LOOP+WARMUP;i++){
      if(WARMUP == i)
	t = wtime();

      if(me == 0){
	MPI_Put(send_buf, size, MPI_CHAR, target, 0, size, MPI_CHAR, win);
	MPI_Win_flush_local(target, win);

	while(send_buf[0] == '0' || send_buf[size-1] == '0'){ MPI_Win_flush(me, win); }
	send_buf[0] = '0'; send_buf[size-1] = '0';
      }
      else {
	while(send_buf[0] == '1' || send_buf[size-1] == '1'){ MPI_Win_flush(me, win); }
	send_buf[0] = '1'; send_buf[size-1] = '1';

	MPI_Put(send_buf, size, MPI_CHAR, target, 0, size, MPI_CHAR, win);
	MPI_Win_flush_local(target, win);
      }
    } //end of LOOP

    t = wtime() - t;
    MPI_Reduce(&t, &t_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
    if(me == 0)
      print_results(size, t_max);
  }

  MPI_Win_unlock_all(win);
  MPI_Win_free(&win);
  MPI_Finalize();
  return 0;
}
예제 #7
0
파일: mcs-lock.c 프로젝트: coti/oshmpi
/** Unlock a mutex.
 *
 * @param[in] hdl   Handle to the mutex
 * @return          MPI status
 */
int MCS_Mutex_unlock(MCS_Mutex hdl)
{
	int next;

	/* Read my next pointer.  FOP is used since another process may write to
	 * this location concurrent with this read. */
	MPI_Fetch_and_op(NULL, &next, MPI_INT, shmem_world_rank, 
			MCS_MTX_ELEM_DISP, MPI_NO_OP, hdl->window);
	MPI_Win_flush(shmem_world_rank, hdl->window);

	if ( next == -1) {
		int tail;
		int nil = -1;

		/* Check if we are the at the tail of the lock queue.  If so, we're
		 * done.  If not, we need to send notification. */
		MPI_Compare_and_swap(&nil, &shmem_world_rank, &tail, MPI_INT, hdl->tail_rank,
				MCS_MTX_TAIL_DISP, hdl->window);
		MPI_Win_flush(hdl->tail_rank, hdl->window);

		if (tail != shmem_world_rank) {
			debug_print("%2d: UNLOCK - waiting for next pointer (tail = %d)\n", shmem_world_rank, tail);
			assert(tail >= 0 && tail < shmem_world_size);

			for (;;) {
				int flag;

				MPI_Fetch_and_op(NULL, &next, MPI_INT, shmem_world_rank, MCS_MTX_ELEM_DISP,
						MPI_NO_OP, hdl->window);

				MPI_Win_flush(shmem_world_rank, hdl->window);
				if (next != -1) break;

                                /* Is this here just to poke progress?  If yes, then that is lame. */
				MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE);
			}
		}
	}

	/* Notify the next waiting process */
	if (next != -1) {
		debug_print("%2d: UNLOCK - notifying %d\n", shmem_world_rank, next);
		MPI_Send(NULL, 0, MPI_BYTE, next, MCS_MUTEX_TAG, hdl->comm);
	}

	debug_print("%2d: UNLOCK - lock released\n", shmem_world_rank);

	return MPI_SUCCESS;
}
예제 #8
0
/*Run FOP with flush */
void run_fop_with_flush (int rank, WINDOW type)
{
    int i;
    MPI_Aint disp = 0;
    MPI_Win     win;

    MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD));

    allocate_atomic_memory(rank, sbuf_original, rbuf_original,
                tbuf_original, NULL, (char **)&sbuf, (char **)&rbuf,
                (char **)&tbuf, NULL, (char **)&rbuf,  MAX_MSG_SIZE, type, &win);

    if(rank == 0) {
        if (type == WIN_DYNAMIC) {
            disp = disp_remote;
        }
        MPI_CHECK(MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win));
        for (i = 0; i < skip + loop; i++) {
            if (i == skip) {
                t_start = MPI_Wtime ();
            }
            MPI_CHECK(MPI_Fetch_and_op(sbuf, tbuf, MPI_LONG_LONG, 1, disp, MPI_SUM, win));
            MPI_CHECK(MPI_Win_flush(1, win));
        }
        t_end = MPI_Wtime ();
        MPI_CHECK(MPI_Win_unlock(1, win));
    }                

    MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD));

    print_latency(rank, 8);

    free_atomic_memory (sbuf, rbuf, tbuf, NULL, win, rank);
}
예제 #9
0
double message_rate (long * buffer, int size, int iterations, int me, int pairs, int nxtpe, MPI_Win win)
{
	int64_t begin, end; 
	int i, offset;

	/*
	 * Touch memory
	 */
	memset(buffer, size, MAX_MSG_SZ * ITERS_LARGE * sizeof(long));

	MPI_Barrier(MPI_COMM_WORLD);
	
	if (me < pairs) {
		begin = TIME();

		for (i = 0, offset = 0; i < iterations; i++, offset++) {
			MPI_Put ((buffer + offset*size), size, MPI_LONG, nxtpe, offset*size, size, MPI_LONG, win);
			//MPI_Win_flush_local (nxtpe, win);
		}
		//MPI_Win_flush_all(win);
		MPI_Win_flush(nxtpe, win);
		end = TIME();

		return ((double)iterations * 1e6) / ((double)end - (double)begin);
	}
	return 0;
}
예제 #10
0
static inline
void _wait_puts(const int target_rank, const MPI_Win win)
{
    if(_is_put_blocking){
      XACC_DEBUG("flush(%d) for [host|acc]", target_rank);
      MPI_Win_flush(target_rank, win);
    }else if(_is_put_local_blocking){
      XACC_DEBUG("flush_local(%d) for [host|acc]", target_rank);
      MPI_Win_flush_local(target_rank, win);
    }
}
예제 #11
0
MTEST_THREAD_RETURN_TYPE run_test(void *arg)
{
    int i;

    for (i = 0; i < LOOPS; i++) {
        /* send a global variable, rather than a stack variable, so
         * other threads can access the address during flush */
        MPI_Put(&dummy, 1, MPI_INT, 0, 0, 1, MPI_INT, win);
        MPI_Win_flush(0, win);
    }

    return (MTEST_THREAD_RETURN_TYPE) NULL;
}
예제 #12
0
void shmemx_ct_set(shmemx_ct_t ct, long value)
{
#ifdef ENABLE_SMP_OPTIMIZATIONS
    if (shmem_world_is_smp) {
        __sync_lock_test_and_set(ct,value);
    } else
#endif
    {
        shmem_offset_t win_offset = (ptrdiff_t)((intptr_t)ct - (intptr_t)shmem_sheap_base_ptr);
        MPI_Fetch_and_op(&value, NULL, MPI_LONG, shmem_world_rank, win_offset, MPI_REPLACE, shmem_sheap_win);
        MPI_Win_flush(shmem_world_rank, shmem_sheap_win);
    }
    return;
}
예제 #13
0
void oshmpi_lock(long * lockp)
{
  MPI_Status status;
  oshmpi_lock_t *lock = (oshmpi_lock_t *) lockp;
  /* Replace myself with the last tail */
  MPI_Fetch_and_op (&shmem_world_rank, &(lock->prev), MPI_INT, TAIL,
		    TAIL_DISP, MPI_REPLACE, oshmpi_lock_win);
  MPI_Win_flush (TAIL, oshmpi_lock_win);

  /* Previous proc holding lock will eventually notify */
  if (lock->prev != -1)
    {
      /* Send my shmem_world_rank to previous proc's next */
      MPI_Accumulate (&shmem_world_rank, 1, MPI_INT, lock->prev, NEXT_DISP,
		      1, MPI_INT, MPI_REPLACE, oshmpi_lock_win);
      MPI_Win_flush (lock->prev, oshmpi_lock_win);
      MPI_Probe (lock->prev, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
    }
  /* Hold lock */
  oshmpi_lock_base[LOCK_DISP] = 1;
  MPI_Win_sync (oshmpi_lock_win);

  return;
}
예제 #14
0
void oshmpi_unlock(long * lockp)
{
  oshmpi_lock_t *lock = (oshmpi_lock_t *) lockp;
  /* Determine my next process */
  MPI_Fetch_and_op (NULL, &(lock->next), MPI_INT, shmem_world_rank,
		    NEXT_DISP, MPI_NO_OP, oshmpi_lock_win);
  MPI_Win_flush (shmem_world_rank, oshmpi_lock_win);

  if (lock->next != -1)
    {
      MPI_Send (&shmem_world_rank, 1, MPI_INT, lock->next, 999, MPI_COMM_WORLD);
    }
  /* Release lock */
  oshmpi_lock_base[LOCK_DISP] = -1;
  MPI_Win_sync (oshmpi_lock_win);

  return;
}
예제 #15
0
dart_ret_t dart_wait(
  dart_handle_t handle)
{
  int mpi_ret;
  DART_LOG_DEBUG("dart_wait() handle:%p", (void*)(handle));
  if (handle != NULL) {
    DART_LOG_TRACE("dart_wait_local:     handle->dest: %d",
                   handle->dest);
    DART_LOG_TRACE("dart_wait_local:     handle->win:  %"PRIu64"",
                   (uint64_t)handle->win);
    DART_LOG_TRACE("dart_wait_local:     handle->req:  %d",
                   handle->request);
    if (handle->request != MPI_REQUEST_NULL) {
      MPI_Status mpi_sta;
      DART_LOG_DEBUG("dart_wait:     -- MPI_Wait");
      mpi_ret = MPI_Wait(&(handle->request), &mpi_sta);
      DART_LOG_TRACE("dart_wait:        -- mpi_sta.MPI_SOURCE: %d",
                     mpi_sta.MPI_SOURCE);
      DART_LOG_TRACE("dart_wait:        -- mpi_sta.MPI_ERROR:  %d:%s",
                     mpi_sta.MPI_ERROR,
                     DART__MPI__ERROR_STR(mpi_sta.MPI_ERROR));
      if (mpi_ret != MPI_SUCCESS) {
        DART_LOG_DEBUG("dart_wait ! MPI_Wait failed");
        return DART_ERR_INVAL;
      }
      DART_LOG_DEBUG("dart_wait:     -- MPI_Win_flush");
      mpi_ret = MPI_Win_flush(handle->dest, handle->win);
      if (mpi_ret != MPI_SUCCESS) {
        DART_LOG_DEBUG("dart_wait ! MPI_Win_flush failed");
        return DART_ERR_INVAL;
      }
    } else {
      DART_LOG_TRACE("dart_wait:     handle->request: MPI_REQUEST_NULL");
    }
    /* Free handle resource */
    DART_LOG_DEBUG("dart_wait:   free handle %p", (void*)(handle));
    free(handle);
    handle = NULL;
  }
  DART_LOG_DEBUG("dart_wait > finished");
  return DART_OK;
}
예제 #16
0
파일: mcs-lock.c 프로젝트: coti/oshmpi
/** Attempt to acquire a mutex.
 *
 * @param[in] hdl   Handle to the mutex
 * @param[out] success Indicates whether the mutex was acquired
 * @return          MPI status
 */
int MCS_Mutex_trylock(MCS_Mutex hdl, int *success)
{
	int tail, nil = -1;

	/* This store is safe, since it cannot happen concurrently with a remote
	 * write */
	hdl->base[MCS_MTX_ELEM_DISP] = -1;
	MPI_Win_sync(hdl->window);

	/* Check if the lock is available and claim it if it is. */
	MPI_Compare_and_swap(&shmem_world_rank, &nil, &tail, MPI_INT, hdl->tail_rank,
			MCS_MTX_TAIL_DISP, hdl->window);
	MPI_Win_flush(hdl->tail_rank, hdl->window);

	/* If the old tail was -1, we have claimed the mutex */
	*success = (tail == nil) ? 0 : 1;

	debug_print("%2d: TRYLOCK - %s\n", shmem_world_rank, (*success) ? "Success" : "Non-success");

	return MPI_SUCCESS;
}
예제 #17
0
/*Run Get_accumulate with flush */
void run_get_acc_with_flush(int rank, WINDOW type)
{
    int size, i;
    MPI_Aint disp = 0;
    MPI_Win     win;

    for (size = 0; size <= MAX_SIZE; size = (size ? size * 2 : size + 1)) {
        allocate_memory(rank, rbuf, size, type, &win);

        if (type == WIN_DYNAMIC) {
            disp = sdisp_remote;
        }

        if(size > LARGE_MESSAGE_SIZE) {
            loop = LOOP_LARGE;
            skip = SKIP_LARGE;
        }

        if(rank == 0) {
            MPI_CHECK(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 1, 0, win));
            for (i = 0; i < skip + loop; i++) {
                if (i == skip) {
                    t_start = MPI_Wtime ();
                }
                MPI_CHECK(MPI_Get_accumulate(sbuf, size, MPI_CHAR, cbuf, size, MPI_CHAR, 1, disp, size,
                    MPI_CHAR, MPI_SUM, win));
                MPI_CHECK(MPI_Win_flush(1, win));
            }
            t_end = MPI_Wtime ();
            MPI_CHECK(MPI_Win_unlock(1, win));
        }                

        MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD));
        
        print_latency(rank, size);
        
        MPI_Win_free(&win);
    }
}
예제 #18
0
dart_ret_t dart_lock_try_acquire (dart_lock_t lock, int32_t *is_acquired)
{
	dart_unit_t unitid;
	dart_team_myid (lock -> teamid, &unitid);
	if (lock -> is_acquired == 1)
	{
		printf ("Warning: TRYLOCK	- %2d has acquired the lock already\n", unitid);
		return DART_OK;
	}
	dart_gptr_t gptr_tail;

	int32_t result[1];
	int32_t compare[1] = {-1};

	DART_GPTR_COPY(gptr_tail, lock -> gptr_tail);
	dart_unit_t tail = gptr_tail.unitid;
	uint64_t offset = gptr_tail.addr_or_offs.offset;

	/* Atomicity: Check if the lock is available and claim it if it is. */
  MPI_Compare_and_swap (&unitid, compare, result, MPI_INT32_T, tail, offset, dart_win_local_alloc);
	MPI_Win_flush (tail, dart_win_local_alloc);

	/* If the old predecessor was -1, we will claim the lock, otherwise, do nothing. */
	if (*result == -1)
	{
		lock -> is_acquired = 1;
		*is_acquired = 1;
	}
	else
	{
		*is_acquired = 0;
	}
	DART_LOG_DEBUG("dart_lock_try_acquire: trylock %s in team %d",
                 ((*is_acquired) ? "succeeded" : "failed"),
                 (lock -> teamid));
	return DART_OK;
}
예제 #19
0
파일: garray.c 프로젝트: kpamnany/Garbo
/*  garray_put()
 */
int64_t garray_put(garray_t *ga, int64_t *lo, int64_t *hi, void *buf_)
{
    int64_t count = (hi[0] - lo[0]) + 1, length = count * ga->elem_size,
            tlonid, tloidx, thinid, thiidx, tnid, tidx, n, oidx = 0;
    int8_t *buf = (int8_t *)buf_;

    calc_target(ga, lo[0], &tlonid, &tloidx);
    calc_target(ga, hi[0], &thinid, &thiidx);

    /* is all data going to the same target? */
    if (tlonid == thinid) {
        LOG_DEBUG(ga->g->glog, "[%d] garray put %ld-%ld, single target %ld.%ld\n",
                  ga->g->nid, lo[0], hi[0], tlonid, tloidx);

        //MPI_Win_lock(MPI_LOCK_EXCLUSIVE, tlonid, 0, ga->win);
        MPI_Put(buf, length, MPI_INT8_T,
                tlonid, (tloidx * ga->elem_size), length, MPI_INT8_T,
                ga->win);
        //MPI_Win_unlock(tlonid, ga->win);
        MPI_Win_flush(tlonid, ga->win);

        return 0;
    }

    /* put the data into the lo nid */
    n = ga->nelems_per_node + (tlonid < ga->nextra_elems ? 1 : 0) - tloidx;

    LOG_DEBUG(ga->g->glog, "[%d] garray putting %ld elements into %ld.%ld\n",
              ga->g->nid, n, tlonid, tloidx);

    //MPI_Win_lock(MPI_LOCK_SHARED, tlonid, 0, ga->win);
    MPI_Put(buf, length, MPI_INT8_T,
            tlonid, (tloidx * ga->elem_size), (n * ga->elem_size), MPI_INT8_T,
            ga->win);
    //MPI_Win_unlock(tlonid, ga->win);

    oidx = (n*ga->elem_size);

    /* put the data into the in-between nids */
    tidx = 0;
    for (tnid = tlonid + 1;  tnid < thinid;  ++tnid) {
        n = ga->nelems_per_node + (tnid < ga->nextra_elems ? 1 : 0);

        LOG_DEBUG(ga->g->glog, "[%d] garray putting %ld elements into %ld.%ld\n",
                  ga->g->nid, n, tnid, tidx);

        //MPI_Win_lock(MPI_LOCK_EXCLUSIVE, tnid, 0, ga->win);
        MPI_Put(&buf[oidx], (n * ga->elem_size), MPI_INT8_T,
                tnid, 0, (n * ga->elem_size), MPI_INT8_T,
                ga->win);
        //MPI_Win_unlock(tnid, ga->win);

        oidx += (n*ga->elem_size);
    }

    /* put the data into the hi nid */
    n = thiidx + 1;

    LOG_DEBUG(ga->g->glog, "[%d] garray putting %ld elements up to %ld.%ld\n",
              ga->g->nid, n, thinid, thiidx);

    //MPI_Win_lock(MPI_LOCK_EXCLUSIVE, thinid, 0, ga->win);
    MPI_Put(&buf[oidx], (n * ga->elem_size), MPI_INT8_T,
            thinid, 0, (n * ga->elem_size), MPI_INT8_T,
            ga->win);
    //MPI_Win_unlock(thinid, ga->win);
    MPI_Win_flush_all(ga->win);

    return 0;
}
예제 #20
0
int main(int argc, char *argv[])
{
    int rank, nproc, i, x;
    int errors = 0, all_errors = 0;
    MPI_Win win = MPI_WIN_NULL;

    MPI_Comm shm_comm = MPI_COMM_NULL;
    int shm_nproc, shm_rank;
    double **shm_bases = NULL, *my_base;
    MPI_Win shm_win = MPI_WIN_NULL;
    MPI_Group shm_group = MPI_GROUP_NULL, world_group = MPI_GROUP_NULL;
    int *shm_ranks = NULL, *shm_ranks_in_world = NULL;
    MPI_Aint get_target_base_offsets = 0;

    int win_size = sizeof(double) * BUF_CNT;
    int new_win_size = win_size;
    int win_unit = sizeof(double);
    int shm_root_rank_in_world;
    int origin = -1, put_target, get_target;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);
    MPI_Comm_group(MPI_COMM_WORLD, &world_group);

    if (nproc != 4) {
        if (rank == 0)
            printf("Error: must be run with four processes\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &shm_comm);
    MPI_Comm_rank(shm_comm, &shm_rank);
    MPI_Comm_size(shm_comm, &shm_nproc);
    MPI_Comm_group(shm_comm, &shm_group);

    /* Platform does not support shared memory or wrong host file, just return. */
    if (shm_nproc != 2) {
        goto exit;
    }

    shm_bases = (double **) calloc(shm_nproc, sizeof(double *));
    shm_ranks = (int *) calloc(shm_nproc, sizeof(int));
    shm_ranks_in_world = (int *) calloc(shm_nproc, sizeof(int));

    if (shm_rank == 0)
        shm_root_rank_in_world = rank;
    MPI_Bcast(&shm_root_rank_in_world, 1, MPI_INT, 0, shm_comm);

    /* Identify ranks of target processes which are located on node 0 */
    if (rank == 0) {
        for (i = 0; i < shm_nproc; i++) {
            shm_ranks[i] = i;
        }
        MPI_Group_translate_ranks(shm_group, shm_nproc, shm_ranks, world_group, shm_ranks_in_world);
    }
    MPI_Bcast(shm_ranks_in_world, shm_nproc, MPI_INT, 0, MPI_COMM_WORLD);

    put_target = shm_ranks_in_world[shm_nproc - 1];
    get_target = shm_ranks_in_world[0];

    /* Identify the rank of origin process which are located on node 1 */
    if (shm_root_rank_in_world == 1 && shm_rank == 0) {
        origin = rank;
        if (verbose) {
            printf("----   I am origin = %d, get_target = %d, put_target = %d\n",
                   origin, get_target, put_target);
        }
    }

    /* Allocate shared memory among local processes */
    MPI_Win_allocate_shared(win_size, win_unit, MPI_INFO_NULL, shm_comm, &my_base, &shm_win);

    if (shm_root_rank_in_world == 0 && verbose) {
        MPI_Aint size;
        int disp_unit;
        for (i = 0; i < shm_nproc; i++) {
            MPI_Win_shared_query(shm_win, i, &size, &disp_unit, &shm_bases[i]);
            printf("%d --    shared query: base[%d]=%p, size %zd, "
                   "unit %d\n", rank, i, shm_bases[i], size, disp_unit);
        }
    }

    /* Get offset of put target(1) on get target(0) */
    get_target_base_offsets = (shm_nproc - 1) * win_size / win_unit;

    if (origin == rank && verbose)
        printf("%d --    base_offset of put_target %d on get_target %d: %zd\n",
               rank, put_target, get_target, get_target_base_offsets);

    /* Create using MPI_Win_create(). Note that new window size of get_target(0)
     * is equal to the total size of shm segments on this node, thus get_target
     * process can read the byte located on put_target process.*/
    for (i = 0; i < BUF_CNT; i++) {
        local_buf[i] = (i + 1) * 1.0;
        my_base[i] = 0.0;
    }

    if (get_target == rank)
        new_win_size = win_size * shm_nproc;

    MPI_Win_create(my_base, new_win_size, win_unit, MPI_INFO_NULL, MPI_COMM_WORLD, &win);

    if (verbose)
        printf("%d --    new window my_base %p, size %d\n", rank, my_base, new_win_size);

    MPI_Barrier(MPI_COMM_WORLD);

    /* Check if flush guarantees the completion of put operations on target side.
     *
     * P exclusively locks 2 processes whose windows are shared with each other.
     * P first put and flush to a process, then get the updated data from another process.
     * If flush returns before operations are done on the target side, the data may be
     * incorrect.*/
    for (x = 0; x < ITER; x++) {
        for (i = 0; i < BUF_CNT; i++) {
            local_buf[i] += x;
            check_buf[i] = 0;
        }

        if (rank == origin) {
            MPI_Win_lock(MPI_LOCK_EXCLUSIVE, put_target, 0, win);
            MPI_Win_lock(MPI_LOCK_EXCLUSIVE, get_target, 0, win);

            for (i = 0; i < BUF_CNT; i++) {
                MPI_Put(&local_buf[i], 1, MPI_DOUBLE, put_target, i, 1, MPI_DOUBLE, win);
            }
            MPI_Win_flush(put_target, win);

            MPI_Get(check_buf, BUF_CNT, MPI_DOUBLE, get_target,
                    get_target_base_offsets, BUF_CNT, MPI_DOUBLE, win);
            MPI_Win_flush(get_target, win);

            for (i = 0; i < BUF_CNT; i++) {
                if (check_buf[i] != local_buf[i]) {
                    printf("%d(iter %d) - Got check_buf[%d] = %.1lf, expected %.1lf\n",
                           rank, x, i, check_buf[i], local_buf[i]);
                    errors++;
                }
            }

            MPI_Win_unlock(put_target, win);
            MPI_Win_unlock(get_target, win);
        }
    }

    MPI_Barrier(MPI_COMM_WORLD);

    MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);

  exit:

    if (rank == 0 && all_errors == 0)
        printf(" No Errors\n");

    if (shm_bases)
        free(shm_bases);
    if (shm_ranks)
        free(shm_ranks);
    if (shm_ranks_in_world)
        free(shm_ranks_in_world);

    if (shm_win != MPI_WIN_NULL)
        MPI_Win_free(&shm_win);

    if (win != MPI_WIN_NULL)
        MPI_Win_free(&win);

    if (shm_comm != MPI_COMM_NULL)
        MPI_Comm_free(&shm_comm);

    if (shm_group != MPI_GROUP_NULL)
        MPI_Group_free(&shm_group);

    if (world_group != MPI_GROUP_NULL)
        MPI_Group_free(&world_group);

    MPI_Finalize();

    return 0;
}
예제 #21
0
int main(int argc, char *argv[])
{
    int rank, nproc, i;
    int errors = 0, all_errors = 0;
    int *buf = NULL, *winbuf = NULL;
    MPI_Win window;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);

    if (nproc < 2) {
        if (rank == 0)
            printf("Error: must be run with two or more processes\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    MPI_Alloc_mem(MAX_SIZE * sizeof(int), MPI_INFO_NULL, &buf);
    MPI_Alloc_mem(MAX_SIZE * sizeof(int), MPI_INFO_NULL, &winbuf);
    MPI_Win_create(winbuf, MAX_SIZE * sizeof(int), sizeof(int), MPI_INFO_NULL,
                   MPI_COMM_WORLD, &window);

    MPI_Win_lock_all(0, window);

    /* Test Raccumulate local completion with small data.
     * Small data is always copied to header packet as immediate data. */
    if (rank == 1) {
        for (i = 0; i < ITER; i++) {
            MPI_Request acc_req;
            int val = -1;

            buf[0] = rank * i;
            MPI_Raccumulate(&buf[0], 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_MAX, window, &acc_req);
            MPI_Wait(&acc_req, MPI_STATUS_IGNORE);

            /* reset local buffer to check local completion */
            buf[0] = 0;
            MPI_Win_flush(0, window);

            MPI_Get(&val, 1, MPI_INT, 0, 0, 1, MPI_INT, window);
            MPI_Win_flush(0, window);

            if (val != rank * i) {
                printf("%d - Got %d in small Raccumulate test, expected %d (%d * %d)\n", rank, val,
                       rank * i, rank, i);
                errors++;
            }
        }
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Test Raccumulate local completion with large data .
     * Large data is not suitable for 1-copy optimization, and always sent out
     * from user buffer. */
    if (rank == 1) {
        for (i = 0; i < ITER; i++) {
            MPI_Request acc_req;
            int val0 = -1, val1 = -1, val2 = -1;
            int j;

            /* initialize data */
            for (j = 0; j < MAX_SIZE; j++) {
                buf[j] = rank + j + i;
            }

            MPI_Raccumulate(buf, MAX_SIZE, MPI_INT, 0, 0, MAX_SIZE, MPI_INT, MPI_REPLACE, window,
                            &acc_req);
            MPI_Wait(&acc_req, MPI_STATUS_IGNORE);

            /* reset local buffer to check local completion */
            buf[0] = 0;
            buf[MAX_SIZE - 1] = 0;
            buf[MAX_SIZE / 2] = 0;
            MPI_Win_flush(0, window);

            /* get remote values which are modified in local buffer after wait */
            MPI_Get(&val0, 1, MPI_INT, 0, 0, 1, MPI_INT, window);
            MPI_Get(&val1, 1, MPI_INT, 0, MAX_SIZE - 1, 1, MPI_INT, window);
            MPI_Get(&val2, 1, MPI_INT, 0, MAX_SIZE / 2, 1, MPI_INT, window);
            MPI_Win_flush(0, window);

            if (val0 != rank + i) {
                printf("%d - Got %d in large Raccumulate test, expected %d\n", rank,
                       val0, rank + i);
                errors++;
            }
            if (val1 != rank + MAX_SIZE - 1 + i) {
                printf("%d - Got %d in large Raccumulate test, expected %d\n", rank,
                       val1, rank + MAX_SIZE - 1 + i);
                errors++;
            }
            if (val2 != rank + MAX_SIZE / 2 + i) {
                printf("%d - Got %d in large Raccumulate test, expected %d\n", rank,
                       val2, rank + MAX_SIZE / 2 + i);
                errors++;
            }
        }
    }

    MPI_Win_unlock_all(window);
    MPI_Barrier(MPI_COMM_WORLD);

    MPI_Win_free(&window);
    if (buf)
        MPI_Free_mem(buf);
    if (winbuf)
        MPI_Free_mem(winbuf);

    MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);

    if (rank == 0 && all_errors == 0)
        printf(" No Errors\n");

    MPI_Finalize();

    return 0;
}
예제 #22
0
int main( int argc, char *argv[] )
{
    int rank, nproc, i;
    int errors = 0, all_errors = 0;
    int *buf;
    MPI_Win window;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);

    if (nproc < 2) {
        if (rank == 0) printf("Error: must be run with two or more processes\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /** Create using MPI_Win_create() **/

    if (rank == 0) {
      MPI_Alloc_mem(4*sizeof(int), MPI_INFO_NULL, &buf);
      *buf = nproc-1;
    } else
      buf = NULL;

    MPI_Win_create(buf, 4*sizeof(int)*(rank == 0), 1, MPI_INFO_NULL, MPI_COMM_WORLD, &window);

    /* PROC_NULL Communication */
    {
        MPI_Request pn_req[4];
        int val[4], res;

        MPI_Win_lock_all(0, window);

        MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, MPI_PROC_NULL, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]);
        MPI_Rget(&val[1], 1, MPI_INT, MPI_PROC_NULL, 1, 1, MPI_INT, window, &pn_req[1]);
        MPI_Rput(&val[2], 1, MPI_INT, MPI_PROC_NULL, 2, 1, MPI_INT, window, &pn_req[2]);
        MPI_Raccumulate(&val[3], 1, MPI_INT, MPI_PROC_NULL, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]);

        assert(pn_req[0] != MPI_REQUEST_NULL);
        assert(pn_req[1] != MPI_REQUEST_NULL);
        assert(pn_req[2] != MPI_REQUEST_NULL);
        assert(pn_req[3] != MPI_REQUEST_NULL);

        MPI_Win_unlock_all(window);

        MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, window);

    /* GET-ACC: Test third-party communication, through rank 0. */
    for (i = 0; i < ITER; i++) {
        MPI_Request gacc_req;
        int val = -1, exp = -1;

        /* Processes form a ring.  Process 0 starts first, then passes a token
         * to the right.  Each process, in turn, performs third-party
         * communication via process 0's window. */
        if (rank > 0) {
            MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        }

        MPI_Rget_accumulate(&rank, 1, MPI_INT, &val, 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_REPLACE, window, &gacc_req);
        assert(gacc_req != MPI_REQUEST_NULL);
        MPI_Wait(&gacc_req, MPI_STATUS_IGNORE);

        MPI_Win_flush(0, window);

        exp = (rank + nproc-1) % nproc;

        if (val != exp) {
            printf("%d - Got %d, expected %d\n", rank, val, exp);
            errors++;
        }

        if (rank < nproc-1) {
            MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD);
        }

        MPI_Barrier(MPI_COMM_WORLD);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    if (rank == 0) *buf = nproc-1;
    MPI_Win_sync(window);

    /* GET+PUT: Test third-party communication, through rank 0. */
    for (i = 0; i < ITER; i++) {
        MPI_Request req;
        int val = -1, exp = -1;

        /* Processes form a ring.  Process 0 starts first, then passes a token
         * to the right.  Each process, in turn, performs third-party
         * communication via process 0's window. */
        if (rank > 0) {
            MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        }

        MPI_Rget(&val, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req);
        assert(req != MPI_REQUEST_NULL);
        MPI_Wait(&req, MPI_STATUS_IGNORE);

        MPI_Rput(&rank, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req);
        assert(req != MPI_REQUEST_NULL);
        MPI_Wait(&req, MPI_STATUS_IGNORE);

        MPI_Win_flush(0, window);

        exp = (rank + nproc-1) % nproc;

        if (val != exp) {
            printf("%d - Got %d, expected %d\n", rank, val, exp);
            errors++;
        }

        if (rank < nproc-1) {
            MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD);
        }

        MPI_Barrier(MPI_COMM_WORLD);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    if (rank == 0) *buf = nproc-1;
    MPI_Win_sync(window);

    /* GET+ACC: Test third-party communication, through rank 0. */
    for (i = 0; i < ITER; i++) {
        MPI_Request req;
        int val = -1, exp = -1;

        /* Processes form a ring.  Process 0 starts first, then passes a token
         * to the right.  Each process, in turn, performs third-party
         * communication via process 0's window. */
        if (rank > 0) {
            MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        }

        MPI_Rget(&val, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req);
        assert(req != MPI_REQUEST_NULL);
        MPI_Wait(&req, MPI_STATUS_IGNORE);

        MPI_Raccumulate(&rank, 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_REPLACE, window, &req);
        assert(req != MPI_REQUEST_NULL);
        MPI_Wait(&req, MPI_STATUS_IGNORE);

        MPI_Win_flush(0, window);

        exp = (rank + nproc-1) % nproc;

        if (val != exp) {
            printf("%d - Got %d, expected %d\n", rank, val, exp);
            errors++;
        }

        if (rank < nproc-1) {
            MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD);
        }

        MPI_Barrier(MPI_COMM_WORLD);
    }
    MPI_Win_unlock(0, window);

    MPI_Barrier(MPI_COMM_WORLD);

    /* Wait inside of an epoch */
    {
        MPI_Request pn_req[4];
        int val[4], res;
        const int target = 0;

        MPI_Win_lock_all(0, window);

        MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]);
        MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]);
        MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]);
        MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]);

        assert(pn_req[0] != MPI_REQUEST_NULL);
        assert(pn_req[1] != MPI_REQUEST_NULL);
        assert(pn_req[2] != MPI_REQUEST_NULL);
        assert(pn_req[3] != MPI_REQUEST_NULL);

        MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE);

        MPI_Win_unlock_all(window);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Wait outside of an epoch */
    {
        MPI_Request pn_req[4];
        int val[4], res;
        const int target = 0;

        MPI_Win_lock_all(0, window);

        MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]);
        MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]);
        MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]);
        MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]);

        assert(pn_req[0] != MPI_REQUEST_NULL);
        assert(pn_req[1] != MPI_REQUEST_NULL);
        assert(pn_req[2] != MPI_REQUEST_NULL);
        assert(pn_req[3] != MPI_REQUEST_NULL);

        MPI_Win_unlock_all(window);

        MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE);
    }

    /* Wait in a different epoch */
    {
        MPI_Request pn_req[4];
        int val[4], res;
        const int target = 0;

        MPI_Win_lock_all(0, window);

        MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]);
        MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]);
        MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]);
        MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]);

        assert(pn_req[0] != MPI_REQUEST_NULL);
        assert(pn_req[1] != MPI_REQUEST_NULL);
        assert(pn_req[2] != MPI_REQUEST_NULL);
        assert(pn_req[3] != MPI_REQUEST_NULL);

        MPI_Win_unlock_all(window);

        MPI_Win_lock_all(0, window);
        MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE);
        MPI_Win_unlock_all(window);
    }

    /* Wait in a fence epoch */
    {
        MPI_Request pn_req[4];
        int val[4], res;
        const int target = 0;

        MPI_Win_lock_all(0, window);

        MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]);
        MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]);
        MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]);
        MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]);

        assert(pn_req[0] != MPI_REQUEST_NULL);
        assert(pn_req[1] != MPI_REQUEST_NULL);
        assert(pn_req[2] != MPI_REQUEST_NULL);
        assert(pn_req[3] != MPI_REQUEST_NULL);

        MPI_Win_unlock_all(window);

        MPI_Win_fence(0, window);
        MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE);
        MPI_Win_fence(0, window);
    }

    MPI_Win_free(&window);
    if (buf) MPI_Free_mem(buf);

    MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);

    if (rank == 0 && all_errors == 0)
        printf(" No Errors\n");

    MPI_Finalize();

    return 0;
}
예제 #23
0
int main(int argc, char ** argv)
{
  long Block_order;        /* number of columns owned by rank       */
  long Block_size;         /* size of a single block                */
  long Colblock_size;      /* size of column block                  */
  int Tile_order=32;       /* default Tile order                    */
  int tiling;              /* boolean: true if tiling is used       */
  int Num_procs;           /* number of ranks                       */
  long order;              /* order of overall matrix               */
  int send_to, recv_from;  /* ranks with which to communicate       */
  long bytes;              /* combined size of matrices             */
  int my_ID;               /* rank                                  */
  int root=0;              /* rank of root                          */
  int iterations;          /* number of times to do the transpose   */
  int i, j, it, jt, istart;/* dummies                               */
  int iter;                /* index of iteration                    */
  int phase;               /* phase inside staged communication     */
  int colstart;            /* starting column for owning rank       */
  int error;               /* error flag                            */
  double RESTRICT *A_p;    /* original matrix column block          */
  double RESTRICT *B_p;    /* transposed matrix column block        */
  double RESTRICT *Work_in_p;/* workspace for transpose function    */
  double RESTRICT *Work_out_p;/* workspace for transpose function   */
  double abserr,           /* absolute error                        */
         abserr_tot;       /* aggregate absolute error              */
  double epsilon = 1.e-8;  /* error tolerance                       */
  double local_trans_time, /* timing parameters                     */
         trans_time,
         avgtime;
  MPI_Win  rma_win = MPI_WIN_NULL;
  MPI_Info rma_winfo = MPI_INFO_NULL;
  int passive_target = 0;  /* use passive target RMA sync           */
#if MPI_VERSION >= 3
  int  flush_local  = 1;   /* flush local (or remote) after put     */
  int  flush_bundle = 1;   /* flush every <bundle> put calls        */
#endif

/*********************************************************************
** Initialize the MPI environment
*********************************************************************/
  MPI_Init(&argc,&argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &my_ID);
  MPI_Comm_size(MPI_COMM_WORLD, &Num_procs);

/*********************************************************************
** process, test and broadcast input parameters
*********************************************************************/
  error = 0;
  if (my_ID == root) {
    printf("Parallel Research Kernels version %s\n", PRKVERSION);
    printf("MPIRMA matrix transpose: B = A^T\n");

    if (argc <= 3){
      printf("Usage: %s <# iterations> <matrix order> [Tile size]"
             "[sync (0=fence, 1=flush)] [flush local?] [flush bundle]\n",
             *argv);
      error = 1; goto ENDOFTESTS;
    }

    iterations  = atoi(*++argv);
    if(iterations < 1){
      printf("ERROR: iterations must be >= 1 : %d \n",iterations);
      error = 1; goto ENDOFTESTS;
    }

    order = atol(*++argv);
    if (order < Num_procs) {
      printf("ERROR: matrix order %ld should at least # procs %d\n",
             order, Num_procs);
      error = 1; goto ENDOFTESTS;
    }
    if (order%Num_procs) {
      printf("ERROR: matrix order %ld should be divisible by # procs %d\n",
             order, Num_procs);
      error = 1; goto ENDOFTESTS;
    }

    if (argc >= 4) Tile_order     = atoi(*++argv);
    if (argc >= 5) passive_target = atoi(*++argv);
#if MPI_VERSION >= 3
    if (argc >= 6) flush_local    = atoi(*++argv);
    if (argc >= 7) flush_bundle   = atoi(*++argv);
#endif

    ENDOFTESTS:;
  }
  bail_out(error);

  if (my_ID == root) {
    printf("Number of ranks      = %d\n", Num_procs);
    printf("Matrix order         = %ld\n", order);
    printf("Number of iterations = %d\n", iterations);
    if ((Tile_order > 0) && (Tile_order < order))
          printf("Tile size            = %d\n", Tile_order);
    else  printf("Untiled\n");
    if (passive_target) {
#if MPI_VERSION < 3
        printf("Synchronization      = MPI_Win_(un)lock\n");
#else
        printf("Synchronization      = MPI_Win_flush%s (bundle=%d)\n", flush_local ? "_local" : "", flush_bundle);
#endif
    } else {
        printf("Synchronization      = MPI_Win_fence\n");
    }
  }

  /*  Broadcast input data to all ranks */
  MPI_Bcast (&order,          1, MPI_LONG, root, MPI_COMM_WORLD);
  MPI_Bcast (&iterations,     1, MPI_INT,  root, MPI_COMM_WORLD);
  MPI_Bcast (&Tile_order,     1, MPI_INT,  root, MPI_COMM_WORLD);
  MPI_Bcast (&passive_target, 1, MPI_INT,  root, MPI_COMM_WORLD);
#if MPI_VERSION >= 3
  MPI_Bcast (&flush_local,    1, MPI_INT,  root, MPI_COMM_WORLD);
  MPI_Bcast (&flush_bundle,   1, MPI_INT,  root, MPI_COMM_WORLD);
#endif

  /* a non-positive tile size means no tiling of the local transpose */
  tiling = (Tile_order > 0) && (Tile_order < order);
  bytes = 2 * sizeof(double) * order * order;

/*********************************************************************
** The matrix is broken up into column blocks that are mapped one to a
** rank.  Each column block is made up of Num_procs smaller square
** blocks of order block_order.
*********************************************************************/

  Block_order    = order/Num_procs;
  colstart       = Block_order * my_ID;
  Colblock_size  = order * Block_order;
  Block_size     = Block_order * Block_order;

  /* debug message size effects */
  if (my_ID == root) {
    printf("Block_size           = %ld\n", Block_size);
  }

/*********************************************************************
** Create the column block of the test matrix, the row block of the
** transposed matrix, and workspace (workspace only if #procs>1)
*********************************************************************/
  A_p = (double *)prk_malloc(Colblock_size*sizeof(double));
  if (A_p == NULL){
    printf(" Error allocating space for original matrix on node %d\n",my_ID);
    error = 1;
  }
  bail_out(error);

  MPI_Info_create (&rma_winfo);
  MPI_Info_set (rma_winfo, "no locks", "true");
  B_p = (double *)prk_malloc(Colblock_size*sizeof(double));
  if (B_p == NULL){
    printf(" Error allocating space for transpose matrix on node %d\n",my_ID);
    error = 1;
  }
  bail_out(error);

  if (Num_procs>1) {
    Work_out_p = (double *) prk_malloc(Block_size*(Num_procs-1)*sizeof(double));
    if (Work_out_p == NULL){
      printf(" Error allocating space for work_out on node %d\n",my_ID);
      error = 1;
    }
    bail_out(error);

    PRK_Win_allocate(Block_size*(Num_procs-1)*sizeof(double), sizeof(double),
                     rma_winfo, MPI_COMM_WORLD, &Work_in_p, &rma_win);
    if (Work_in_p == NULL){
      printf(" Error allocating space for work on node %d\n",my_ID);
      error = 1;
    }
    bail_out(error);
  }

#if MPI_VERSION >= 3
  if (passive_target && Num_procs>1) {
    MPI_Win_lock_all(MPI_MODE_NOCHECK,rma_win);
  }
#endif

  /* Fill the original column matrix                                                */
  istart = 0;
  for (j=0;j<Block_order;j++) {
    for (i=0;i<order; i++) {
      A(i,j) = (double) (order*(j+colstart) + i);
      B(i,j) = 0.0;
    }
  }

  MPI_Barrier(MPI_COMM_WORLD);

  for (iter = 0; iter<=iterations; iter++) {

    /* start timer after a warmup iteration                                        */
    if (iter == 1) {
      MPI_Barrier(MPI_COMM_WORLD);
      local_trans_time = wtime();
    }

    /* do the local transpose                                                     */
    istart = colstart;
    if (!tiling) {
      for (i=0; i<Block_order; i++) {
        for (j=0; j<Block_order; j++) {
          B(j,i) += A(i,j);
          A(i,j) += 1.0;
        }
      }
    } else {
      for (i=0; i<Block_order; i+=Tile_order) {
        for (j=0; j<Block_order; j+=Tile_order) {
          for (it=i; it<MIN(Block_order,i+Tile_order); it++) {
            for (jt=j; jt<MIN(Block_order,j+Tile_order);jt++) {
              B(jt,it) += A(it,jt);
              A(it,jt) += 1.0;
            }
          }
        }
      }
    }

    if (!passive_target && Num_procs>1) {
      MPI_Win_fence(MPI_MODE_NOSTORE | MPI_MODE_NOPRECEDE, rma_win);
    }

    for (phase=1; phase<Num_procs; phase++){
      send_to = (my_ID - phase + Num_procs)%Num_procs;

      istart = send_to*Block_order;
      if (!tiling) {
        for (i=0; i<Block_order; i++) {
          for (j=0; j<Block_order; j++) {
            Work_out(phase-1,j,i) = A(i,j);
            A(i,j) += 1.0;
          }
        }
      } else {
        for (i=0; i<Block_order; i+=Tile_order) {
          for (j=0; j<Block_order; j+=Tile_order) {
            for (it=i; it<MIN(Block_order,i+Tile_order); it++) {
              for (jt=j; jt<MIN(Block_order,j+Tile_order);jt++) {
                Work_out(phase-1,jt,it) = A(it,jt);
                A(it,jt) += 1.0;
              }
            }
          }
        }
      }

#if MPI_VERSION < 3
      if (passive_target) {
          MPI_Win_lock(MPI_LOCK_SHARED, send_to, MPI_MODE_NOCHECK, rma_win);
      }
#endif
      MPI_Put(Work_out_p+Block_size*(phase-1), Block_size, MPI_DOUBLE, send_to,
              Block_size*(phase-1), Block_size, MPI_DOUBLE, rma_win);

      if (passive_target) {
#if MPI_VERSION < 3
        MPI_Win_unlock(send_to, rma_win);
#else
        if (flush_bundle==1) {
          if (flush_local==1) {
              MPI_Win_flush_local(send_to, rma_win);
          } else {
              MPI_Win_flush(send_to, rma_win);
          }
        } else if ( (phase%flush_bundle) == 0) {
          /* Too lazy to record all targets, so let MPI do it internally (hopefully) */
          if (flush_local==1) {
              MPI_Win_flush_local_all(rma_win);
          } else {
              MPI_Win_flush_all(rma_win);
          }
        }
#endif
      }
    }  /* end of phase loop for puts  */
    if (Num_procs>1) {
      if (passive_target) {
#if MPI_VERSION >= 3
          MPI_Win_flush_all(rma_win);
#endif
          MPI_Barrier(MPI_COMM_WORLD);
      } else {
          MPI_Win_fence(MPI_MODE_NOSTORE, rma_win);
      }
    }

    for (phase=1; phase<Num_procs; phase++) {
      recv_from = (my_ID + phase)%Num_procs;
      istart = recv_from*Block_order;
      /* scatter received block to transposed matrix; no need to tile */
      for (j=0; j<Block_order; j++) {
        for (i=0; i<Block_order; i++) {
          B(i,j) += Work_in(phase-1,i,j);
        }
      }
    } /* end of phase loop for scatters */

    /* for the flush case we need to make sure we have consumed Work_in
       before overwriting it in the next iteration                    */
    if (Num_procs>1 && passive_target) {
      MPI_Barrier(MPI_COMM_WORLD);
    }

  } /* end of iterations */

  local_trans_time = wtime() - local_trans_time;
  MPI_Reduce(&local_trans_time, &trans_time, 1, MPI_DOUBLE, MPI_MAX, root,
             MPI_COMM_WORLD);

  abserr = 0.0;
  istart = 0;
  double addit = ((double)(iterations+1) * (double) (iterations))/2.0;
  for (j=0;j<Block_order;j++) {
    for (i=0;i<order; i++) {
      abserr += ABS(B(i,j) - ((double)(order*i + j+colstart)*(iterations+1)+addit));
    }
  }

  MPI_Reduce(&abserr, &abserr_tot, 1, MPI_DOUBLE, MPI_SUM, root, MPI_COMM_WORLD);

  if (my_ID == root) {
    if (abserr_tot < epsilon) {
      printf("Solution validates\n");
      avgtime = trans_time/(double)iterations;
      printf("Rate (MB/s): %lf Avg time (s): %lf\n",1.0E-06*bytes/avgtime, avgtime);
    }
    else {
      printf("ERROR: Aggregate absolute error %lf exceeds threshold %e\n", abserr_tot, epsilon);
      error = 1;
    }
  }

  bail_out(error);

  if (rma_win!=MPI_WIN_NULL) {
#if MPI_VERSION >=3
    if (passive_target) {
      MPI_Win_unlock_all(rma_win);
    }
#endif
    PRK_Win_free(&rma_win);
  }

  MPI_Finalize();
  exit(EXIT_SUCCESS);

}  /* end of main */
예제 #24
0
void IMB_rma_compare_and_swap (struct comm_info* c_info, int size,  
                               struct iter_schedule* iterations,
                               MODES run_mode, double* time)
{
    double res_time = -1.;
    int root = c_info->pair1;
    int s_size;
    int i;
    void *comp_b, *orig_b, *res_b; 
    MPI_Datatype data_type = MPI_INT;
    ierr = 0;
          
    if (c_info->rank < 0)
    {
        *time = res_time;
        return;
    }    
    
    MPI_Type_size(data_type,&s_size);
    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
    

    if (c_info->rank == c_info->pair0)
    {
        /* use r_buffer for all buffers required by compare_and_swap, because 
         * on all ranks r_buffer is zero-initialized in IMB_set_buf function */
        orig_b = (char*)c_info->r_buffer + s_size*2;
        comp_b = (char*)c_info->r_buffer + s_size;
        res_b  = c_info->r_buffer;
 
        MPI_Win_lock(MPI_LOCK_SHARED, root, 0, c_info->WIN);
        if (run_mode->AGGREGATE)
        {
            res_time = MPI_Wtime();
            for (i = 0; i < iterations->n_sample; i++)
            {
                ierr = MPI_Compare_and_swap(
                        (char*)orig_b + i%iterations->r_cache_iter*iterations->r_offs,
                        (char*)comp_b + i%iterations->r_cache_iter*iterations->r_offs,
                        (char*)res_b  + i%iterations->r_cache_iter*iterations->r_offs,
                        data_type, root, i%iterations->r_cache_iter*iterations->r_offs, 
                        c_info->WIN );
                MPI_ERRHAND(ierr);
            }
            ierr = MPI_Win_flush(root, c_info->WIN);
            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
        }    
        else if ( !run_mode->AGGREGATE )    
        {
            res_time = MPI_Wtime();
            for (i = 0; i < iterations->n_sample; i++)
            {
                ierr = MPI_Compare_and_swap(
                        (char*)orig_b + i%iterations->s_cache_iter*iterations->s_offs,
                        (char*)comp_b + i%iterations->s_cache_iter*iterations->s_offs,
                        (char*)res_b  + i%iterations->r_cache_iter*iterations->r_offs,
                        data_type, root, i%iterations->r_cache_iter*iterations->r_offs,
                        c_info->WIN );
                MPI_ERRHAND(ierr);

                ierr = MPI_Win_flush(root, c_info->WIN);
                MPI_ERRHAND(ierr);
            }
            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
        }
        MPI_Win_unlock(root, c_info->WIN);
    }
    MPI_Barrier(c_info->communicator);

    *time = res_time; 
    return;
}
예제 #25
0
void run_rma_test(int nprocs_per_node)
{
  int myrank, nprocs;
  int mem_rank;
  MPI_Win win;
  int *baseptr;
  MPI_Aint local_size;

  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  if (nprocs < nprocs_per_node * 2)
  {
    if (!myrank) printf("should start program with at least %d processes\n", nprocs_per_node * 2);
    MPI_Finalize();
    exit(EXIT_FAILURE);
  }


  mem_rank = nprocs_per_node + nprocs_per_node / 2;

  local_size = (myrank == mem_rank) ? COUNT : 0;

  MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &win);

  MPI_Win_lock_all(0, win);



  int type_size;
  MPI_Type_size(MPI_INT, &type_size);

  size_t nbytes = COUNT * type_size;

  assert(MPI_Alloc_mem(nbytes, MPI_INFO_NULL, &baseptr) == MPI_SUCCESS);
  assert(MPI_Win_attach(win, baseptr, nbytes) == MPI_SUCCESS);

  MPI_Aint ldisp;
  MPI_Aint *disps = malloc(nprocs * sizeof(MPI_Aint));

  assert(MPI_Get_address(baseptr, &ldisp) == MPI_SUCCESS);

  assert(MPI_Allgather(&ldisp, 1, MPI_AINT, disps, nprocs, MPI_AINT, MPI_COMM_WORLD) == MPI_SUCCESS);

  if (myrank == 0)
  {
    for (size_t idx = 0; idx < COUNT; ++idx) {
      baseptr[idx] = idx * COUNT + 1;
    }
  }

  MPI_Barrier(MPI_COMM_WORLD);

  if (myrank == mem_rank) {
    assert(MPI_Get(baseptr, 10, MPI_INT, 0, disps[0], 10, MPI_INT, win) == MPI_SUCCESS);
    assert(MPI_Win_flush(0, win) == MPI_SUCCESS);

    for (size_t idx = 0; idx < COUNT; ++idx) {
      assert(baseptr[idx] == idx * 10 + 1);
    }
  }

  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Win_unlock_all(win);

  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Win_free(&win);

  MPI_Free_mem(baseptr);

  printf("Test finished\n");
}
예제 #26
0
void IMB_rma_accumulate (struct comm_info* c_info, int size,  
                         struct iter_schedule* iterations,
                         MODES run_mode, double* time)
{
    double res_time = -1.;
    Type_Size s_size,r_size;
    int s_num, r_num;
    /* IMB 3.1 << */
    int r_off;
    int i;
    int root = c_info->pair1;
    ierr = 0;
     
    if (c_info->rank < 0)
    {
        *time = res_time;
        return;
    }    
    
    MPI_Type_size(c_info->red_data_type,&s_size);
    s_num=size/s_size;
    r_size=s_size;
    r_num=s_num;
    r_off=iterations->r_offs/r_size;

    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);

    if (c_info->rank == c_info->pair0)
    {
        MPI_Win_lock(MPI_LOCK_SHARED, root, 0, c_info->WIN);
        if (run_mode->AGGREGATE)
        {
            res_time = MPI_Wtime();
            for (i = 0; i < iterations->n_sample; i++)
            {
                ierr = MPI_Accumulate(
                        (char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs,
                        s_num, c_info->red_data_type, root, 
                        i%iterations->r_cache_iter*r_off, r_num, 
                        c_info->red_data_type, c_info->op_type, c_info->WIN );
                MPI_ERRHAND(ierr);
            }
            ierr = MPI_Win_flush(root, c_info->WIN);
            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
        }    
        else if ( !run_mode->AGGREGATE )    
        {
            res_time = MPI_Wtime();
            for (i = 0; i < iterations->n_sample; i++)
            {
                ierr = MPI_Accumulate(
                        (char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs,
                        s_num, c_info->red_data_type, root, 
                        i%iterations->r_cache_iter*r_off, r_num, 
                        c_info->red_data_type, c_info->op_type, c_info->WIN );
                MPI_ERRHAND(ierr);

                ierr = MPI_Win_flush(root, c_info->WIN);
                MPI_ERRHAND(ierr);
            }
            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
        }
        MPI_Win_unlock(root, c_info->WIN);
    }
    MPI_Barrier(c_info->communicator);

    *time = res_time; 
    return;
}    
예제 #27
0
int main(int argc, char *argv[])
{
    int rank, size, i, j, k;
    int errors = 0;
    int origin_shm, origin_am, dest;
    int *orig_buf = NULL, *result_buf = NULL, *compare_buf = NULL,
        *target_buf = NULL, *check_buf = NULL;
    MPI_Win win;
    MPI_Status status;

    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    if (size != 3) {
        /* run this test with three processes */
        goto exit_test;
    }

    /* this works when MPIR_PARAM_CH3_ODD_EVEN_CLIQUES is set */
    dest = 2;
    origin_shm = 0;
    origin_am = 1;

    if (rank != dest) {
        MPI_Alloc_mem(sizeof(int), MPI_INFO_NULL, &orig_buf);
        MPI_Alloc_mem(sizeof(int), MPI_INFO_NULL, &result_buf);
        MPI_Alloc_mem(sizeof(int), MPI_INFO_NULL, &compare_buf);
    }

    MPI_Win_allocate(sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &target_buf, &win);

    for (k = 0; k < LOOP_SIZE; k++) {

        /* init buffers */
        if (rank == origin_shm) {
            orig_buf[0] = 1;
            compare_buf[0] = 0;
            result_buf[0] = 0;
        }
        else if (rank == origin_am) {
            orig_buf[0] = 0;
            compare_buf[0] = 1;
            result_buf[0] = 0;
        }
        else {
            MPI_Win_lock(MPI_LOCK_SHARED, rank, 0, win);
            target_buf[0] = 0;
            MPI_Win_unlock(rank, win);
        }

        MPI_Barrier(MPI_COMM_WORLD);

        /* perform FOP */
        MPI_Win_lock_all(0, win);
        if (rank != dest) {
            MPI_Compare_and_swap(orig_buf, compare_buf, result_buf, MPI_INT, dest, 0, win);
            MPI_Win_flush(dest, win);
        }
        MPI_Win_unlock_all(win);

        MPI_Barrier(MPI_COMM_WORLD);

        /* check results */
        if (rank != dest) {
            MPI_Gather(result_buf, 1, MPI_INT, check_buf, 1, MPI_INT, dest, MPI_COMM_WORLD);
        }
        else {
            MPI_Alloc_mem(sizeof(int) * 3, MPI_INFO_NULL, &check_buf);
            MPI_Gather(target_buf, 1, MPI_INT, check_buf, 1, MPI_INT, dest, MPI_COMM_WORLD);

            if (!(check_buf[dest] == 0 && check_buf[origin_shm] == 0 && check_buf[origin_am] == 1)
                && !(check_buf[dest] == 1 && check_buf[origin_shm] == 0 &&
                     check_buf[origin_am] == 0)) {

                printf
                    ("Wrong results: target result = %d, origin_shm result = %d, origin_am result = %d\n",
                     check_buf[dest], check_buf[origin_shm], check_buf[origin_am]);

                printf
                    ("Expected results (1): target result = 1, origin_shm result = 0, origin_am result = 0\n");
                printf
                    ("Expected results (2): target result = 0, origin_shm result = 0, origin_am result = 1\n");

                errors++;
            }

            MPI_Free_mem(check_buf);
        }
    }

    MPI_Win_free(&win);

    if (rank == origin_am || rank == origin_shm) {
        MPI_Free_mem(orig_buf);
        MPI_Free_mem(result_buf);
        MPI_Free_mem(compare_buf);
    }

  exit_test:
    if (rank == dest && errors == 0)
        printf(" No Errors\n");

    MPI_Finalize();
    return 0;
}
int main(int argc, char **argv)
{
    int i, rank, nproc;
    int shm_rank, shm_nproc;
    MPI_Aint size;
    int errors = 0, all_errors = 0;
    int **bases = NULL, *my_base = NULL;
    int disp_unit;
    MPI_Win shm_win = MPI_WIN_NULL, win = MPI_WIN_NULL;
    MPI_Comm shm_comm = MPI_COMM_NULL;
    MPI_Group shm_group = MPI_GROUP_NULL, world_group = MPI_GROUP_NULL;
    int dst_shm_rank, dst_world_rank;
    MPI_Info create_info = MPI_INFO_NULL;

    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);

    MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &shm_comm);

    MPI_Comm_rank(shm_comm, &shm_rank);
    MPI_Comm_size(shm_comm, &shm_nproc);

    /* Platform does not support shared memory, just return. */
    if (shm_nproc < 2) {
        goto exit;
    }

    /* Specify the last process in the node as the target process */
    dst_shm_rank = shm_nproc - 1;
    MPI_Comm_group(shm_comm, &shm_group);
    MPI_Comm_group(MPI_COMM_WORLD, &world_group);
    MPI_Group_translate_ranks(shm_group, 1, &dst_shm_rank, world_group, &dst_world_rank);

    bases = calloc(shm_nproc, sizeof(int *));

    /* Allocate shm window among local processes, then create a global window with
     * those shm window buffers */
    MPI_Win_allocate_shared(sizeof(int) * ELEM_PER_PROC, sizeof(int), MPI_INFO_NULL,
                            shm_comm, &my_base, &shm_win);
    if (verbose)
        printf("%d -- allocate shared: my_base = %p, absolute base\n", shm_rank, my_base);

    for (i = 0; i < shm_nproc; i++) {
        MPI_Win_shared_query(shm_win, i, &size, &disp_unit, &bases[i]);
        if (verbose)
            printf("%d --    shared query: base[%d]=%p, size %ld, unit %d\n",
                   shm_rank, i, bases[i], size, disp_unit);
    }

#ifdef USE_INFO_ALLOC_SHM
    MPI_Info_create(&create_info);
    MPI_Info_set(create_info, "alloc_shm", "true");
#else
    create_info = MPI_INFO_NULL;
#endif

    MPI_Win_create(my_base, sizeof(int) * ELEM_PER_PROC, sizeof(int), create_info, MPI_COMM_WORLD,
                   &win);

    /* Reset data */
    for (i = 0; i < ELEM_PER_PROC; i++) {
        my_base[i] = 0;
        local_buf[i] = i + 1;
    }

    /* Do RMA through global window, then check value through shared window */
    MPI_Win_lock_all(0, win);
    MPI_Win_lock_all(0, shm_win);

    if (shm_rank == 0) {
        MPI_Put(&local_buf[0], 1, MPI_INT, dst_world_rank, 0, 1, MPI_INT, win);
        MPI_Put(&local_buf[ELEM_PER_PROC - 1], 1, MPI_INT, dst_world_rank, ELEM_PER_PROC - 1, 1,
                MPI_INT, win);
        MPI_Win_flush(dst_world_rank, win);
    }

    MPI_Win_sync(shm_win);
    MPI_Barrier(shm_comm);
    MPI_Win_sync(shm_win);

    if (bases[dst_shm_rank][0] != local_buf[0]) {
        errors++;
        printf("%d -- Got %d at rank %d index %d, expected %d\n", rank,
               bases[dst_shm_rank][0], dst_shm_rank, 0, local_buf[0]);
    }
    if (bases[dst_shm_rank][ELEM_PER_PROC - 1] != local_buf[ELEM_PER_PROC - 1]) {
        errors++;
        printf("%d -- Got %d at rank %d index %d, expected %d\n", rank,
               bases[dst_shm_rank][ELEM_PER_PROC - 1], dst_shm_rank,
               ELEM_PER_PROC - 1, local_buf[ELEM_PER_PROC - 1]);
    }

    MPI_Win_unlock_all(shm_win);
    MPI_Win_unlock_all(win);

    MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);

    MPI_Win_free(&win);
    MPI_Win_free(&shm_win);

  exit:
    if (rank == 0 && all_errors == 0)
        printf(" No Errors\n");

    if (create_info != MPI_INFO_NULL)
        MPI_Info_free(&create_info);
    if (shm_comm != MPI_COMM_NULL)
        MPI_Comm_free(&shm_comm);
    if (shm_group != MPI_GROUP_NULL)
        MPI_Group_free(&shm_group);
    if (world_group != MPI_GROUP_NULL)
        MPI_Group_free(&world_group);

    MPI_Finalize();

    if (bases)
        free(bases);

    return 0;
}
예제 #29
0
dart_ret_t dart_waitall(
  dart_handle_t * handle,
  size_t          n)
{
  int i, r_n;
  int num_handles = (int)n;
  DART_LOG_DEBUG("dart_waitall()");
  if (n == 0) {
    DART_LOG_ERROR("dart_waitall > number of handles = 0");
    return DART_OK;
  }
  if (n > INT_MAX) {
    DART_LOG_ERROR("dart_waitall ! number of handles > INT_MAX");
    return DART_ERR_INVAL;
  }
  DART_LOG_DEBUG("dart_waitall: number of handles: %d", num_handles);
  if (*handle) {
    MPI_Status  *mpi_sta;
    MPI_Request *mpi_req;
    mpi_req = (MPI_Request *) malloc(num_handles * sizeof(MPI_Request));
    mpi_sta = (MPI_Status *)  malloc(num_handles * sizeof(MPI_Status));
    /*
     * copy requests from DART handles to MPI request array:
     */
    DART_LOG_TRACE("dart_waitall: copying DART handles to MPI request array");
    r_n = 0;
    for (i = 0; i < num_handles; i++) {
      if (handle[i] != NULL) {
        DART_LOG_DEBUG("dart_waitall: -- handle[%d](%p): "
                       "dest:%d win:%"PRIu64" req:%"PRIu64"",
                       i, (void*)handle[i],
                       handle[i]->dest,
                       (uint64_t)handle[i]->win,
                       (uint64_t)handle[i]->request);
        mpi_req[r_n] = handle[i]->request;
        r_n++;
      }
    }
    /*
     * wait for communication of MPI requests:
     */
    DART_LOG_DEBUG("dart_waitall: MPI_Waitall, %d requests from %d handles",
                   r_n, num_handles);
    /* From the MPI 3.1 standard:
     *
     * The i-th entry in array_of_statuses is set to the return
     * status of the i-th operation. Active persistent requests
     * are marked inactive.
     * Requests of any other type are deallocated and the
     * corresponding handles in the array are set to
     * MPI_REQUEST_NULL.
     * The list may contain null or inactive handles.
     * The call sets to empty the status of each such entry.
     */
    if (r_n > 0) {
      if (MPI_Waitall(r_n, mpi_req, mpi_sta) == MPI_SUCCESS) {
        DART_LOG_DEBUG("dart_waitall: MPI_Waitall completed");
      } else {
        DART_LOG_ERROR("dart_waitall: MPI_Waitall failed");
        DART_LOG_TRACE("dart_waitall: free MPI_Request temporaries");
        free(mpi_req);
        DART_LOG_TRACE("dart_waitall: free MPI_Status temporaries");
        free(mpi_sta);
        return DART_ERR_INVAL;
      }
    } else {
      DART_LOG_DEBUG("dart_waitall > number of requests = 0");
      return DART_OK;
    }
    /*
     * copy MPI requests back to DART handles:
     */
    DART_LOG_TRACE("dart_waitall: copying MPI requests back to DART handles");
    r_n = 0;
    for (i = 0; i < num_handles; i++) {
      if (handle[i]) {
        if (mpi_req[r_n] == MPI_REQUEST_NULL) {
          DART_LOG_TRACE("dart_waitall: -- mpi_req[%d] = MPI_REQUEST_NULL",
                         r_n);
        } else {
          DART_LOG_TRACE("dart_waitall: -- mpi_req[%d] = %d",
                         r_n, mpi_req[r_n]);
        }
        DART_LOG_TRACE("dart_waitall: -- mpi_sta[%d].MPI_SOURCE: %d",
                       r_n, mpi_sta[r_n].MPI_SOURCE);
        DART_LOG_TRACE("dart_waitall: -- mpi_sta[%d].MPI_ERROR:  %d:%s",
                       r_n,
                       mpi_sta[r_n].MPI_ERROR,
                       DART__MPI__ERROR_STR(mpi_sta[r_n].MPI_ERROR));
        handle[i]->request = mpi_req[r_n];
        r_n++;
      }
    }
    /*
     * wait for completion of MPI requests at origins and targets:
     */
    DART_LOG_DEBUG("dart_waitall: waiting for remote completion");
    for (i = 0; i < num_handles; i++) {
      if (handle[i]) {
        if (handle[i]->request == MPI_REQUEST_NULL) {
          DART_LOG_TRACE("dart_waitall: -- handle[%d] done (MPI_REQUEST_NULL)",
                         i);
        } else {
          DART_LOG_DEBUG("dart_waitall: -- MPI_Win_flush(handle[%d]: %p))",
                         i, (void*)handle[i]);
          DART_LOG_TRACE("dart_waitall:      handle[%d]->dest: %d",
                         i, handle[i]->dest);
          DART_LOG_TRACE("dart_waitall:      handle[%d]->win:  %"PRIu64"",
                         i, (uint64_t)handle[i]->win);
          DART_LOG_TRACE("dart_waitall:      handle[%d]->req:  %"PRIu64"",
                         i, (uint64_t)handle[i]->request);
          /*
           * MPI_Win_flush to wait for remote completion:
           */
          if (MPI_Win_flush(handle[i]->dest, handle[i]->win) != MPI_SUCCESS) {
            DART_LOG_ERROR("dart_waitall: MPI_Win_flush failed");
            DART_LOG_TRACE("dart_waitall: free MPI_Request temporaries");
            free(mpi_req);
            DART_LOG_TRACE("dart_waitall: free MPI_Status temporaries");
            free(mpi_sta);
            return DART_ERR_INVAL;
          }
          DART_LOG_TRACE("dart_waitall: -- MPI_Request_free");
          if (MPI_Request_free(&handle[i]->request) != MPI_SUCCESS) {
            DART_LOG_ERROR("dart_waitall: MPI_Request_free failed");
            DART_LOG_TRACE("dart_waitall: free MPI_Request temporaries");
            free(mpi_req);
            DART_LOG_TRACE("dart_waitall: free MPI_Status temporaries");
            free(mpi_sta);
            return DART_ERR_INVAL;
          }
        }
      }
    }
    /*
     * free memory:
     */
    DART_LOG_DEBUG("dart_waitall: free handles");
    for (i = 0; i < num_handles; i++) {
      if (handle[i]) {
        /* Free handle resource */
        DART_LOG_TRACE("dart_waitall: -- free handle[%d]: %p",
                       i, (void*)(handle[i]));
        free(handle[i]);
        handle[i] = NULL;
      }
    }
    DART_LOG_TRACE("dart_waitall: free MPI_Request temporaries");
    free(mpi_req);
    DART_LOG_TRACE("dart_waitall: free MPI_Status temporaries");
    free(mpi_sta);
  }
  DART_LOG_DEBUG("dart_waitall > finished");
  return DART_OK;
}
예제 #30
0
/**
 * TODO: Check if MPI_Accumulate (REPLACE) can bring better performance?
 */
dart_ret_t dart_get_blocking(
  void        * dest,
  dart_gptr_t   gptr,
  size_t        nbytes)
{
  MPI_Win     win;
  MPI_Aint    disp_s,
              disp_rel;
  dart_unit_t target_unitid_abs = gptr.unitid;
  dart_unit_t target_unitid_rel = target_unitid_abs;
  uint64_t    offset            = gptr.addr_or_offs.offset;
  int16_t     seg_id            = gptr.segid;
  uint16_t    index             = gptr.flags;

  /*
   * MPI uses offset type int, do not copy more than INT_MAX elements:
   */
  if (nbytes > INT_MAX) {
    DART_LOG_ERROR("dart_get_blocking ! failed: nbytes > INT_MAX");
    return DART_ERR_INVAL;
  }
  if (seg_id) {
    unit_g2l(index, target_unitid_abs, &target_unitid_rel);
  }

  DART_LOG_DEBUG("dart_get_blocking() uid_abs:%d uid_rel:%d "
                 "o:%"PRIu64" s:%d i:%u, nbytes:%zu",
                 target_unitid_abs, target_unitid_rel,
                 offset, seg_id, index, nbytes);

#if !defined(DART_MPI_DISABLE_SHARED_WINDOWS)
  DART_LOG_DEBUG("dart_get_blocking: shared windows enabled");
  if (seg_id >= 0) {
    int    i;
    char * baseptr;
    /*
     * Use memcpy if the target is in the same node as the calling unit:
     * The value of i will be the target's relative ID in teamid.
     */
    i = dart_sharedmem_table[index][gptr.unitid];
    if (i >= 0) {
      DART_LOG_DEBUG("dart_get_blocking: shared memory segment, seg_id:%d",
                     seg_id);
      if (seg_id) {
        if (dart_adapt_transtable_get_baseptr(seg_id, i, &baseptr) == -1) {
          DART_LOG_ERROR("dart_get_blocking ! "
                         "dart_adapt_transtable_get_baseptr failed");
          return DART_ERR_INVAL;
        }
      } else {
        baseptr = dart_sharedmem_local_baseptr_set[i];
      }
      baseptr += offset;
      DART_LOG_DEBUG("dart_get_blocking: memcpy %zu bytes", nbytes);
      memcpy((char*)dest, baseptr, nbytes);
      return DART_OK;
    }
  }
#else
  DART_LOG_DEBUG("dart_get_blocking: shared windows disabled");
#endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */
  /*
   * MPI shared windows disabled or target and calling unit are on different
   * nodes, use MPI_Rget:
   */
  if (seg_id) {
    if (dart_adapt_transtable_get_disp(
          seg_id,
          target_unitid_rel,
          &disp_s) == -1) {
      DART_LOG_ERROR("dart_get_blocking ! "
                     "dart_adapt_transtable_get_disp failed");
      return DART_ERR_INVAL;
    }
    win      = dart_win_lists[index];
    disp_rel = disp_s + offset;
    DART_LOG_DEBUG("dart_get_blocking:  nbytes:%zu "
                   "source (coll.): win:%"PRIu64" unit:%d offset:%"PRIu64" "
                   "-> dest: %p",
                   nbytes, (uint64_t)win, target_unitid_rel,
                   (uint64_t)disp_rel, dest);
  } else {
    win      = dart_win_local_alloc;
    disp_rel = offset;
    DART_LOG_DEBUG("dart_get_blocking:  nbytes:%zu "
                   "source (local): win:%"PRIu64" unit:%d offset:%"PRIu64" "
                   "-> dest: %p",
                   nbytes, (uint64_t)win, target_unitid_rel,
                   (uint64_t)disp_rel, dest);
  }

  /*
   * Using MPI_Get as MPI_Win_flush is required to ensure remote completion.
   */
  DART_LOG_DEBUG("dart_get_blocking: MPI_Get");
  if (MPI_Get(dest,
              nbytes,
              MPI_BYTE,
              target_unitid_rel,
              disp_rel,
              nbytes,
              MPI_BYTE,
              win)
      != MPI_SUCCESS) {
    DART_LOG_ERROR("dart_get_blocking ! MPI_Get failed");
    return DART_ERR_INVAL;
  }
  DART_LOG_DEBUG("dart_get_blocking: MPI_Win_flush");
  if (MPI_Win_flush(target_unitid_rel, win) != MPI_SUCCESS) {
    DART_LOG_ERROR("dart_get_blocking ! MPI_Win_flush failed");
    return DART_ERR_INVAL;
  }

  DART_LOG_DEBUG("dart_get_blocking > finished");
  return DART_OK;
}