コード例 #1
0
ファイル: strided.c プロジェクト: abhinavvishnu/matex
/** Translate a strided operation into a more general IO Vector iterator.
  *
  * @param[in] src_ptr         Source starting address of the data block to put.
  * @param[in] src_stride_arr  Source array of stride distances in bytes.
  * @param[in] dst_ptr         Destination starting address to put data.
  * @param[in] dst_stride_ar   Destination array of stride distances in bytes.
  * @param[in] count           Block size in each dimension. count[0] should be the
  *                            number of bytes of contiguous data in leading dimension.
  * @param[in] stride_levels   The level of strides.
  *
  * @return                    ARMCI IOV iterator corresponding to the strided parameters.
  */
armcii_iov_iter_t *ARMCII_Strided_to_iov_iter(
               void *src_ptr, int src_stride_ar[/*stride_levels*/],
               void *dst_ptr, int dst_stride_ar[/*stride_levels*/], 
               int count[/*stride_levels+1*/], int stride_levels) {

  int i;
  armcii_iov_iter_t *it = malloc(sizeof(armcii_iov_iter_t));

  ARMCII_Assert(it != NULL);

  it->src = src_ptr;
  it->dst = dst_ptr;
  it->stride_levels = stride_levels;
  it->base_ptr      = malloc(sizeof(int)*(4*stride_levels+1));
  it->was_contiguous= 0;

  ARMCII_Assert( it->base_ptr != NULL );

  it->src_stride_ar = &it->base_ptr[0*stride_levels];
  it->dst_stride_ar = &it->base_ptr[1*stride_levels];
  it->count         = &it->base_ptr[2*stride_levels];
  it->idx           = &it->base_ptr[3*stride_levels+1];

  for (i = 0; i < stride_levels; i++) {
    it->src_stride_ar[i] = src_stride_ar[i];
    it->dst_stride_ar[i] = dst_stride_ar[i];
    it->count[i]         = count[i];
    it->idx[i]           = 0;
  }

  return it;
}
コード例 #2
0
/** Create a mutex group.  Collective.
  *
  * @param[in] count Number of mutexes to create on the calling process
  * @return          Handle to the mutex group
  */
armcix_mutex_hdl_t ARMCIX_Create_mutexes_hdl(int count, ARMCI_Group *pgroup) {
  int         ierr, i;
  armcix_mutex_hdl_t hdl;

  hdl = malloc(sizeof(struct armcix_mutex_hdl_s));
  ARMCII_Assert(hdl != NULL);

  MPI_Comm_dup(pgroup->comm, &hdl->comm);

  if (count > 0) {
    MPI_Alloc_mem(count*sizeof(long), MPI_INFO_NULL, &hdl->base);
    ARMCII_Assert(hdl->base != NULL);
  } else {
    hdl->base = NULL;
  }

  hdl->count = count;

  // Initialize mutexes to 0
  for (i = 0; i < count; i++)
    hdl->base[i] = 0;

  ierr = MPI_Win_create(hdl->base, count*sizeof(long), sizeof(long) /* displacement size */,
                        MPI_INFO_NULL, hdl->comm, &hdl->window);
  ARMCII_Assert(ierr == MPI_SUCCESS);

  return hdl;
}
コード例 #3
0
ファイル: gmr.c プロジェクト: abhinavvishnu/matex
/** One-sided accumulate operation with typed arguments.  Source buffer must be private.
  *
  * @param[in] mreg      Memory region
  * @param[in] src       Address of source data
  * @param[in] src_count Number of elements of the given type at the source
  * @param[in] src_type  MPI datatype of the source elements
  * @param[in] dst       Address of destination buffer
  * @param[in] dst_count Number of elements of the given type at the destination
  * @param[in] src_type  MPI datatype of the destination elements
  * @param[in] size      Number of bytes to transfer
  * @param[in] proc      Absolute process id of target process
  * @return              0 on success, non-zero on failure
  */
int gmr_accumulate_typed(gmr_t *mreg, void *src, int src_count, MPI_Datatype src_type,
    void *dst, int dst_count, MPI_Datatype dst_type, int proc) {

  int        grp_proc;
  gmr_size_t disp;
  MPI_Aint lb, extent;

  grp_proc = ARMCII_Translate_absolute_to_group(&mreg->group, proc);
  ARMCII_Assert(grp_proc >= 0);

  // Calculate displacement from beginning of the window
  if (dst == MPI_BOTTOM) 
    disp = 0;
  else
    disp = (gmr_size_t) ((uint8_t*)dst - (uint8_t*)mreg->slices[proc].base);

  // Perform checks
  MPI_Type_get_true_extent(dst_type, &lb, &extent);
  ARMCII_Assert(mreg->lock_state != GMR_LOCK_UNLOCKED);
  ARMCII_Assert_msg(disp >= 0 && disp < mreg->slices[proc].size, "Invalid remote address");
  ARMCII_Assert_msg(disp + dst_count*extent <= mreg->slices[proc].size, "Transfer is out of range");

  MPI_Accumulate(src, src_count, src_type, grp_proc, (MPI_Aint) disp, dst_count, dst_type, MPI_SUM, mreg->window);

  return 0;
}
コード例 #4
0
ファイル: strided.c プロジェクト: abhinavvishnu/matex
/** Convert an ARMCI strided access description into an MPI subarray datatype.
  *
  * @param[in]  stride_array    Array of strides
  * @param[in]  count           Array of transfer counts
  * @param[in]  stride_levels   Number of levels of striding
  * @param[in]  old_type        Type of the data element described by count and stride_array
  * @param[out] new_type        New MPI type for the given strided access
  */
void ARMCII_Strided_to_dtype(int stride_array[/*stride_levels*/], int count[/*stride_levels+1*/],
                             int stride_levels, MPI_Datatype old_type, MPI_Datatype *new_type)
{
  int sizes   [stride_levels+1];
  int subsizes[stride_levels+1];
  int starts  [stride_levels+1];
  int i, old_type_size;

  MPI_Type_size(old_type, &old_type_size);

  /* Eliminate counts that don't count (all 1 counts at the end) */
  for (i = stride_levels+1; i > 0 && stride_levels > 0 && count[i-1] == 1; i--)
    stride_levels--;

  /* A correct strided spec should me monotonic increasing and stride_array[i+1] should
     be a multiple of stride_array[i]. */
  if (stride_levels > 0) {
    for (i = 1; i < stride_levels; i++)
      ARMCII_Assert(stride_array[i] >= stride_array[i-1] && stride_array[i] % stride_array[i-1] == 0);
  }

  /* Test for a contiguous transfer */
  if (stride_levels == 0) {
    int elem_count = count[0]/old_type_size;

    ARMCII_Assert(count[0] % old_type_size == 0);
    MPI_Type_contiguous(elem_count, old_type, new_type);
  }

  /* Transfer is non-contiguous */
  else {

    for (i = 0; i < stride_levels+1; i++)
      starts[i] = 0;

    sizes   [stride_levels] = stride_array[0]/old_type_size;
    subsizes[stride_levels] = count[0]/old_type_size;

    ARMCII_Assert(stride_array[0] % old_type_size == 0 && count[0] % old_type_size == 0);

    for (i = 1; i < stride_levels; i++) {
      /* Convert strides into dimensions by dividing out contributions from lower dims */
      sizes   [stride_levels-i] = stride_array[i]/stride_array[i-1];
      subsizes[stride_levels-i] = count[i];

      ARMCII_Assert_msg(stride_array[i] % stride_array[i-1] == 0, "Invalid striding");
    }

    sizes   [0] = count[stride_levels];
    subsizes[0] = count[stride_levels];

    MPI_Type_create_subarray(stride_levels+1, sizes, subsizes, starts, MPI_ORDER_C, old_type, new_type);
  }
}
コード例 #5
0
ファイル: buffer.c プロジェクト: abhinavvishnu/matex
/** Prepare a set of buffers for use with an accumulate operation.  The
  * returned set of buffers is guaranteed to be in private space and scaled.
  * Copies will be made if needed, the result should be completed by finish.
  *
  * @param[in]  orig_bufs Original set of buffers.
  * @param[out] new_bufs  Pointer to the set of private buffers.
  * @param[in]  count     Number of entries in the buffer list.
  * @param[in]  size      The size of the buffers (all are of the same size).
  * @param[in]  datatype  The type of the buffer.
  * @param[in]  scale     Scaling constant to apply to each buffer.
  * @return               Number of buffers that were moved.
  */
int ARMCII_Buf_prepare_acc_vec(void **orig_bufs, void ***new_bufs_ptr, int count, int size,
                            int datatype, void *scale) {

  void **new_bufs;
  int i, scaled, num_moved = 0;
  
  new_bufs = malloc(count*sizeof(void*));
  ARMCII_Assert(new_bufs != NULL);

  scaled = ARMCII_Buf_acc_is_scaled(datatype, scale);

  for (i = 0; i < count; i++) {
    gmr_t *mreg = NULL;

    // Check if the source buffer is within a shared region.
    if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD)
      mreg = gmr_lookup(orig_bufs[i], ARMCI_GROUP_WORLD.rank);

    if (scaled) {
      MPI_Alloc_mem(size, MPI_INFO_NULL, &new_bufs[i]);
      ARMCII_Assert(new_bufs[i] != NULL);

      // Lock if needed so we can directly access the buffer
      if (mreg != NULL)
        gmr_dla_lock(mreg);

      ARMCII_Buf_acc_scale(orig_bufs[i], new_bufs[i], size, datatype, scale);

      if (mreg != NULL)
        gmr_dla_unlock(mreg);
    } else {
      new_bufs[i] = orig_bufs[i];
    }

    if (mreg != NULL) {
      // If the buffer wasn't copied, we should copy it into a private buffer
      if (new_bufs[i] == orig_bufs[i]) {
        MPI_Alloc_mem(size, MPI_INFO_NULL, &new_bufs[i]);
        ARMCII_Assert(new_bufs[i] != NULL);

        gmr_dla_lock(mreg);
        ARMCI_Copy(orig_bufs[i], new_bufs[i], size);
        gmr_dla_unlock(mreg);
      }
    }

    if (new_bufs[i] == orig_bufs[i])
      num_moved++;
  }

  *new_bufs_ptr = new_bufs;
  
  return num_moved;
}
コード例 #6
0
ファイル: onesided.c プロジェクト: jeffhammond/armci-mpi
/** One-sided accumulate operation.
  *
  * @param[in] datatype ARMCI data type for the accumulate operation (see armci.h)
  * @param[in] scale    Pointer for a scalar of type datatype that will be used to
  *                     scale values in the source buffer
  * @param[in] src      Source address (remote)
  * @param[in] dst      Destination address (local)
  * @param[in] bytes    Number of bytes to transfer
  * @param[in] proc     Process id to target
  * @return             0 on success, non-zero on failure
  */
int PARMCI_Acc(int datatype, void *scale, void *src, void *dst, int bytes, int proc) {
  void  *src_buf;
  int    count, type_size, scaled;
  MPI_Datatype type;
  gmr_t *src_mreg, *dst_mreg;

  /* If NOGUARD is set, assume the buffer is not shared */
  if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD)
    src_mreg = gmr_lookup(src, ARMCI_GROUP_WORLD.rank);
  else
    src_mreg = NULL;

  dst_mreg = gmr_lookup(dst, proc);

  ARMCII_Assert_msg(dst_mreg != NULL, "Invalid remote pointer");

  /* Prepare the input data: Apply scaling if needed and acquire the DLA lock if
   * needed.  We hold the DLA lock if (src_buf == src && src_mreg != NULL). */

  scaled = ARMCII_Buf_acc_is_scaled(datatype, scale);

  if (scaled) {
      MPI_Alloc_mem(bytes, MPI_INFO_NULL, &src_buf);
      ARMCII_Assert(src_buf != NULL);
      ARMCII_Buf_acc_scale(src, src_buf, bytes, datatype, scale);
  } else {
    src_buf = src;
  }

  /* Check if we need to copy: user requested it or same mem region */
  if (   (src_buf == src) /* buf_prepare didn't make a copy */
      && (ARMCII_GLOBAL_STATE.shr_buf_method == ARMCII_SHR_BUF_COPY || src_mreg == dst_mreg) )
  {
    MPI_Alloc_mem(bytes, MPI_INFO_NULL, &src_buf);
    ARMCII_Assert(src_buf != NULL);
    ARMCI_Copy(src, src_buf, bytes);
  }

  ARMCII_Acc_type_translate(datatype, &type, &type_size);
  count = bytes/type_size;

  ARMCII_Assert_msg(bytes % type_size == 0, 
      "Transfer size is not a multiple of the datatype size");

  /* TODO: Support a local accumulate operation more efficiently */

  gmr_accumulate(dst_mreg, src_buf, dst, count, type, proc);
  gmr_flush(dst_mreg, proc, 1); /* flush_local */

  if (src_buf != src)
    MPI_Free_mem(src_buf);

  return 0;
}
コード例 #7
0
ファイル: mutex_hdl_queue.c プロジェクト: abhinavvishnu/matex
/** Lock a mutex.
  * 
  * @param[in] hdl        Mutex group that the mutex belongs to.
  * @param[in] mutex      Desired mutex number [0..count-1]
  * @param[in] world_proc Absolute ID of process where the mutex lives
  */
void ARMCIX_Lock_hdl(armcix_mutex_hdl_t hdl, int mutex, int world_proc) {
  int       rank, nproc, already_locked, i, proc;
  uint8_t *buf;

  ARMCII_Assert(mutex >= 0 && mutex < hdl->max_count);

  MPI_Comm_rank(hdl->grp.comm, &rank);
  MPI_Comm_size(hdl->grp.comm, &nproc);

  /* User gives us the absolute ID.  Translate to the rank in the mutex's group. */
  proc = ARMCII_Translate_absolute_to_group(&hdl->grp, world_proc);
  ARMCII_Assert(proc >= 0);

  buf = malloc(nproc*sizeof(uint8_t));
  ARMCII_Assert(buf != NULL);

  buf[rank] = 1;

  /* Get all data from the lock_buf, except the byte belonging to
   * me. Set the byte belonging to me to 1. */
  MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->windows[mutex]);
  
  MPI_Put(&buf[rank], 1, MPI_BYTE, proc, rank, 1, MPI_BYTE, hdl->windows[mutex]);

  /* Get data to the left of rank */
  if (rank > 0) {
    MPI_Get(buf, rank, MPI_BYTE, proc, 0, rank, MPI_BYTE, hdl->windows[mutex]);
  }

  /* Get data to the right of rank */
  if (rank < nproc - 1) {
    MPI_Get(&buf[rank+1], nproc-1-rank, MPI_BYTE, proc, rank + 1, nproc-1-rank, MPI_BYTE, hdl->windows[mutex]);
  }
  
  MPI_Win_unlock(proc, hdl->windows[mutex]);

  ARMCII_Assert(buf[rank] == 1);

  for (i = already_locked = 0; i < nproc; i++)
    if (buf[i] && i != rank)
      already_locked = 1;

  /* Wait for notification */
  if (already_locked) {
    MPI_Status status;
    ARMCII_Dbg_print(DEBUG_CAT_MUTEX, "waiting for notification [proc = %d, mutex = %d]\n", proc, mutex);
    MPI_Recv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE, ARMCI_MUTEX_TAG+mutex, hdl->grp.comm, &status);
  }

  ARMCII_Dbg_print(DEBUG_CAT_MUTEX, "lock acquired [proc = %d, mutex = %d]\n", proc, mutex);
  free(buf);
}
コード例 #8
0
/** Lock a mutex.
  * 
  * @param[in] hdl         Mutex group that the mutex belongs to.
  * @param[in] mutex       Desired mutex number [0..count-1]
  * @param[in] world_proc  Absolute ID of process where the mutex lives
  */
void ARMCIX_Lock_hdl(armcix_mutex_hdl_t hdl, int mutex, int world_proc) {
  int       rank, nproc, proc;
  long      lock_val, unlock_val, lock_out;
  int       timeout = 1;

  MPI_Comm_rank(hdl->comm, &rank);
  MPI_Comm_size(hdl->comm, &nproc);

  /* User gives us the absolute ID.  Translate to the rank in the mutex's group. */
  proc = ARMCII_Translate_absolute_to_group(hdl->comm, world_proc);
  ARMCII_Assert(proc >= 0);

  lock_val   = rank+1;    // Map into range 1..nproc
  unlock_val = -1 * (rank+1);

  /* mutex <- mutex + rank */
  MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->window);
  MPI_Accumulate(&lock_val, 1, MPI_LONG, proc, mutex, 1, MPI_LONG, MPI_SUM, hdl->window);
  MPI_Win_unlock(proc, hdl->window);

  for (;;) {
    /* read mutex value */
    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->window);
    MPI_Get(&lock_out, 1, MPI_LONG, proc, mutex, 1, MPI_LONG, hdl->window);
    MPI_Win_unlock(proc, hdl->window);

    ARMCII_Assert(lock_out > 0);
    ARMCII_Assert(lock_out <= nproc*(nproc+1)/2); // Must be < sum of all ranks

    /* We are holding the mutex */
    if (lock_out == rank+1)
      break;

    /* mutex <- mutex - rank */
    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->window);
    MPI_Accumulate(&unlock_val, 1, MPI_LONG, proc, mutex, 1, MPI_LONG, MPI_SUM, hdl->window);
    MPI_Win_unlock(proc, hdl->window);

    /* Exponential backoff */
    usleep(timeout + rand()%timeout);
    timeout = MIN(timeout*TIMEOUT_MUL, MAX_TIMEOUT);
    if (rand() % nproc == 0) // Chance to reset timeout
      timeout = 1;

    /* mutex <- mutex + rank */
    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->window);
    MPI_Accumulate(&lock_val, 1, MPI_LONG, proc, mutex, 1, MPI_LONG, MPI_SUM, hdl->window);
    MPI_Win_unlock(proc, hdl->window);
  }
}
コード例 #9
0
ファイル: mutex_hdl_queue.c プロジェクト: abhinavvishnu/matex
/** Unlock a mutex.
  * 
  * @param[in] hdl   Mutex group that the mutex belongs to.
  * @param[in] mutex Desired mutex number [0..count-1]
  * @param[in] world_proc Absolute ID of process where the mutex lives
  */
void ARMCIX_Unlock_hdl(armcix_mutex_hdl_t hdl, int mutex, int world_proc) {
  int      rank, nproc, i, proc;
  uint8_t *buf;

  ARMCII_Assert(mutex >= 0 && mutex < hdl->max_count);

  MPI_Comm_rank(hdl->grp.comm, &rank);
  MPI_Comm_size(hdl->grp.comm, &nproc);

  proc = ARMCII_Translate_absolute_to_group(&hdl->grp, world_proc);
  ARMCII_Assert(proc >= 0);

  buf = malloc(nproc*sizeof(uint8_t));

  buf[rank] = 0;

  /* Get all data from the lock_buf, except the byte belonging to
   * me. Set the byte belonging to me to 0. */
  MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->windows[mutex]);
  
  MPI_Put(&buf[rank], 1, MPI_BYTE, proc, rank, 1, MPI_BYTE, hdl->windows[mutex]);

  /* Get data to the left of rank */
  if (rank > 0) {
    MPI_Get(buf, rank, MPI_BYTE, proc, 0, rank, MPI_BYTE, hdl->windows[mutex]);
  }

  /* Get data to the right of rank */
  if (rank < nproc - 1) {
    MPI_Get(&buf[rank+1], nproc-1-rank, MPI_BYTE, proc, rank + 1, nproc-1-rank, MPI_BYTE, hdl->windows[mutex]);
  }
  
  MPI_Win_unlock(proc, hdl->windows[mutex]);

  ARMCII_Assert(buf[rank] == 0);

  /* Notify the next waiting process, starting to my right for fairness */
  for (i = 1; i < nproc; i++) {
    int p = (rank + i) % nproc;
    if (buf[p] == 1) {
      ARMCII_Dbg_print(DEBUG_CAT_MUTEX, "notifying %d [proc = %d, mutex = %d]\n", p, proc, mutex);
      MPI_Send(NULL, 0, MPI_BYTE, p, ARMCI_MUTEX_TAG+mutex, hdl->grp.comm);
      break;
    }
  }

  ARMCII_Dbg_print(DEBUG_CAT_MUTEX, "lock released [proc = %d, mutex = %d]\n", proc, mutex);
  free(buf);
}
コード例 #10
0
ファイル: gmr.c プロジェクト: abhinavvishnu/matex
/** Unlock a memory region that was locked for direct local access.
  *
  * @param[in] mreg     Memory region
  */
void gmr_dla_unlock(gmr_t *mreg) {
  int grp_proc = ARMCII_Translate_absolute_to_group(&mreg->group, ARMCI_GROUP_WORLD.rank);

  ARMCII_Assert(grp_proc >= 0);
  ARMCII_Assert(mreg->lock_state == GMR_LOCK_DLA);
  ARMCII_Assert_msg((mreg->access_mode & ARMCIX_MODE_NO_LOAD_STORE) == 0,
      "Direct local access is not allowed in the current access mode");

  mreg->dla_lock_count--;

  if (mreg->dla_lock_count == 0) {
    MPI_Win_unlock(grp_proc, mreg->window);
    mreg->lock_state = GMR_LOCK_UNLOCKED;
  }
}
コード例 #11
0
ファイル: buffer.c プロジェクト: abhinavvishnu/matex
/** Prepare a set of buffers for use with a get operation.  The returned set of
  * buffers is guaranteed to be in private space.  Copies will be made if needed,
  * the result should be completed by finish.
  *
  * @param[in]  orig_bufs Original set of buffers.
  * @param[out] new_bufs  Pointer to the set of private buffers.
  * @param[in]  count     Number of entries in the buffer list.
  * @param[in]  size      The size of the buffers (all are of the same size).
  * @return               Number of buffers that were moved.
  */
int ARMCII_Buf_prepare_write_vec(void **orig_bufs, void ***new_bufs_ptr, int count, int size) {
  int num_moved = 0;

  if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) {
    void **new_bufs = malloc(count*sizeof(void*));
    int i;

    for (i = 0; i < count; i++)
      new_bufs[i] = NULL;

    for (i = 0; i < count; i++) {
      // Check if the destination buffer is within a shared region.  If not, create
      // a temporary private buffer to hold the result.
      gmr_t *mreg = gmr_lookup(orig_bufs[i], ARMCI_GROUP_WORLD.rank);

      if (mreg != NULL) {
        MPI_Alloc_mem(size, MPI_INFO_NULL, &new_bufs[i]);
        ARMCII_Assert(new_bufs[i] != NULL);
        num_moved++;
      } else {
        new_bufs[i] = orig_bufs[i];
      }
    }

    *new_bufs_ptr = new_bufs;
  } else {
    *new_bufs_ptr = orig_bufs;
  }
  
  return num_moved;
}
コード例 #12
0
ファイル: groups.c プロジェクト: jeffhammond/armci-mpi
/** Translate a group process rank to the corresponding process rank in the
  * ARMCI world group.
  *
  * @param[in] group      Group to translate from.
  * @param[in] group_rank Rank of the process in group.
  */
int ARMCI_Absolute_id(ARMCI_Group *group, int group_rank) {
  int       world_rank;
  MPI_Group world_group, sub_group;

  ARMCII_Assert(group_rank >= 0 && group_rank < group->size);

  /* Check if group is the world group */
  if (group->comm == ARMCI_GROUP_WORLD.comm)
    world_rank = group_rank;

  /* Check for translation cache */
  else if (group->grp_to_abs != NULL)
    world_rank = group->grp_to_abs[group_rank];

  else {
    /* Translate the rank */
    MPI_Comm_group(ARMCI_GROUP_WORLD.comm, &world_group);
    MPI_Comm_group(group->comm, &sub_group);

    MPI_Group_translate_ranks(sub_group, 1, &group_rank, world_group, &world_rank);

    MPI_Group_free(&world_group);
    MPI_Group_free(&sub_group);
  }

  /* Check if translation failed */
  if (world_rank == MPI_UNDEFINED)
    return -1;
  else
    return world_rank;
}
コード例 #13
0
ファイル: message.c プロジェクト: jeffhammond/armci-mpi
/** Broadcast on a group. Collective.
  *
  * @param[in]    scope ARMCI scope
  * @param[inout] buf   Input on the root, output on all other processes
  * @param[in]    len   Number of bytes in the message
  * @param[in]    abs_root Absolute rank of the process at the root of the broadcast
  * @param[in]    group ARMCI group on which to perform communication
  */
void armci_msg_group_bcast_scope(int scope, void *buf_in, int len, int abs_root, ARMCI_Group *group) {
  int    grp_root;
  void **buf;

  if (scope == SCOPE_ALL || scope == SCOPE_MASTERS) {
    /* Is the buffer an input or an output? */
    if (ARMCI_GROUP_WORLD.rank == abs_root)
      ARMCII_Buf_prepare_read_vec(&buf_in, &buf, 1, len);
    else
      ARMCII_Buf_prepare_write_vec(&buf_in, &buf, 1, len);

    grp_root = ARMCII_Translate_absolute_to_group(group, abs_root);
    ARMCII_Assert(grp_root >= 0 && grp_root < group->size);

    MPI_Bcast(buf[0], len, MPI_BYTE, grp_root, group->comm);

    if (ARMCI_GROUP_WORLD.rank == abs_root)
      ARMCII_Buf_finish_read_vec(&buf_in, buf, 1, len);
    else
      ARMCII_Buf_finish_write_vec(&buf_in, buf, 1, len);
  } else /* SCOPE_NODE */ {
    grp_root = 0;

    /* This is a self-broadcast, which is a no-op. */
  }
}
コード例 #14
0
ファイル: malloc.c プロジェクト: jeffhammond/armci-mpi
/** Allocate a shared memory segment.  Collective.
  *
  * @param[out] base_ptrs Array that will contain pointers to the base address of
  *                       each process' patch of the segment.  Array is of length
  *                       equal to the number of processes in the group.
  * @param[in]       size Number of bytes to allocate on the local process.
  */
int ARMCI_Malloc_group(void **base_ptrs, armci_size_t size, ARMCI_Group *group) {
  int i;
  gmr_t *mreg;

  ARMCII_Assert(PARMCI_Initialized());

  mreg = gmr_create(size, base_ptrs, group);

  if (DEBUG_CAT_ENABLED(DEBUG_CAT_ALLOC)) {
#define BUF_LEN 1000
    char ptr_string[BUF_LEN];
    int  count = 0;

    if (mreg == NULL) {
      strncpy(ptr_string, "NULL", 5);
    } else {
      for (i = 0; i < mreg->nslices && count < BUF_LEN; i++)
        count += snprintf(ptr_string+count, BUF_LEN-count, 
            (i == mreg->nslices-1) ? "%p" : "%p ", base_ptrs[i]);
    }

    ARMCII_Dbg_print(DEBUG_CAT_ALLOC, "base ptrs [%s]\n", ptr_string);
#undef BUF_LEN
  }

  return 0;
}
コード例 #15
0
ファイル: mutex_hdl_queue.c プロジェクト: abhinavvishnu/matex
/** Create a group of ARMCI mutexes.  Collective onthe ARMCI group.
  *
  * @param[in] count  Number of mutexes on the local process.
  * @param[in] pgroup ARMCI group on which to create mutexes
  * @return           Handle to the mutex group.
  */
armcix_mutex_hdl_t ARMCIX_Create_mutexes_hdl(int my_count, ARMCI_Group *pgroup) {
  int rank, nproc, max_count, i;
  armcix_mutex_hdl_t hdl;

  hdl = malloc(sizeof(struct armcix_mutex_hdl_s));
  ARMCII_Assert(hdl != NULL);

  ARMCIX_Group_dup(pgroup, &hdl->grp);

  MPI_Comm_rank(hdl->grp.comm, &rank);
  MPI_Comm_size(hdl->grp.comm, &nproc);

  hdl->my_count = my_count;

  /* Find the max. count to determine how many windows we need. */
  MPI_Allreduce(&my_count, &max_count, 1, MPI_INT, MPI_MAX, hdl->grp.comm);
  ARMCII_Assert_msg(max_count > 0, "Invalid number of mutexes");

  hdl->max_count = max_count;
  hdl->windows = malloc(sizeof(MPI_Win)*max_count);

  if (my_count > 0) {
    hdl->bases = malloc(sizeof(uint8_t*)*my_count);
  } else {
    hdl->bases = NULL;
  }

  /* We need multiple windows here: one for each mutex.  Otherwise
     performance will suffer due to exclusive access epochs. */
  for (i = 0; i < max_count; i++) {
    int   size = 0;
    void *base = NULL;

    if (i < my_count) {
      MPI_Alloc_mem(nproc, MPI_INFO_NULL, &hdl->bases[i]);
      ARMCII_Assert(hdl->bases[i] != NULL);
      ARMCII_Bzero(hdl->bases[i], nproc);

      base = hdl->bases[i];
      size = nproc;
    }

    MPI_Win_create(base, size, sizeof(uint8_t), MPI_INFO_NULL, hdl->grp.comm, &hdl->windows[i]);
  }

  return hdl;
}
コード例 #16
0
ファイル: gmr.c プロジェクト: abhinavvishnu/matex
/** Lock a memory region so that one-sided operations can be performed.
  *
  * @param[in] mreg     Memory region
  * @param[in] mode     Lock mode (exclusive, shared, etc...)
  * @param[in] proc     Absolute process id of the target
  * @return             0 on success, non-zero on failure
  */
void gmr_lock(gmr_t *mreg, int proc) {
  int grp_proc = ARMCII_Translate_absolute_to_group(&mreg->group, proc);
  int grp_me   = ARMCII_Translate_absolute_to_group(&mreg->group, ARMCI_GROUP_WORLD.rank);
  int lock_assert, lock_mode;

  ARMCII_Assert(grp_proc >= 0 && grp_me >= 0);
  ARMCII_Assert(mreg->lock_state == GMR_LOCK_UNLOCKED || mreg->lock_state == GMR_LOCK_DLA);

  /* Check for active DLA and suspend if needed */
  if (mreg->lock_state == GMR_LOCK_DLA) {
    ARMCII_Assert(grp_me == mreg->lock_target);
    MPI_Win_unlock(mreg->lock_target, mreg->window);
    mreg->lock_state = GMR_LOCK_DLA_SUSP;
  }

  if (   mreg->access_mode & ARMCIX_MODE_CONFLICT_FREE 
      && mreg->access_mode & ARMCIX_MODE_NO_LOAD_STORE )
  {
    /* Only non-conflicting RMA accesses allowed.
       Shared and exclusive locks. */
    lock_assert = MPI_MODE_NOCHECK;
    lock_mode   = MPI_LOCK_SHARED;
  } else if (mreg->access_mode & ARMCIX_MODE_CONFLICT_FREE) {
    /* Non-conflicting RMA and local accesses allowed.
       Shared and exclusive locks. */
    lock_assert = 0;
    lock_mode   = MPI_LOCK_SHARED;
  } else {
    /* Conflicting RMA and local accesses allowed.
       Exclusive locks. */
    lock_assert = 0;
    lock_mode   = MPI_LOCK_EXCLUSIVE;
  }

  MPI_Win_lock(lock_mode, grp_proc, lock_assert, mreg->window);

  if (lock_mode == MPI_LOCK_EXCLUSIVE)
    mreg->lock_state = GMR_LOCK_EXCLUSIVE;
  else
    mreg->lock_state = GMR_LOCK_SHARED;

  mreg->lock_target = grp_proc;
}
コード例 #17
0
ファイル: groups.c プロジェクト: jeffhammond/armci-mpi
/** Initialize an ARMCI group's remaining fields using the communicator field.
  */
void ARMCII_Group_init_from_comm(ARMCI_Group *group) {
  if (group->comm != MPI_COMM_NULL) {
    MPI_Comm_size(group->comm, &group->size);
    MPI_Comm_rank(group->comm, &group->rank);

  } else {
    group->rank = -1;
    group->size =  0;
  }

  /* If noncollective groups are in use, create a separate communicator that
    can be used for noncollective group creation with this group as the parent.
    This ensures that calls to MPI_Intercomm_create can't clash with any user
    communication. */

  if (ARMCII_GLOBAL_STATE.noncollective_groups && group->comm != MPI_COMM_NULL)
    MPI_Comm_dup(group->comm, &group->noncoll_pgroup_comm);
  else
    group->noncoll_pgroup_comm = MPI_COMM_NULL;

  /* Check if translation caching is enabled */
  if (ARMCII_GLOBAL_STATE.cache_rank_translation) {
    if (group->comm != MPI_COMM_NULL) {
      int      *ranks, i;
      MPI_Group world_group, sub_group;

      group->abs_to_grp = malloc(sizeof(int)*ARMCI_GROUP_WORLD.size);
      group->grp_to_abs = malloc(sizeof(int)*group->size);
      ranks = malloc(sizeof(int)*ARMCI_GROUP_WORLD.size);

      ARMCII_Assert(group->abs_to_grp != NULL && group->grp_to_abs != NULL && ranks != NULL);

      for (i = 0; i < ARMCI_GROUP_WORLD.size; i++)
        ranks[i] = i;

      MPI_Comm_group(ARMCI_GROUP_WORLD.comm, &world_group);
      MPI_Comm_group(group->comm, &sub_group);

      MPI_Group_translate_ranks(sub_group, group->size, ranks, world_group, group->grp_to_abs);
      MPI_Group_translate_ranks(world_group, ARMCI_GROUP_WORLD.size, ranks, sub_group, group->abs_to_grp);

      MPI_Group_free(&world_group);
      MPI_Group_free(&sub_group);

      free(ranks);
    }
  }
  
  /* Translation caching is disabled */
  else {
    group->abs_to_grp = NULL;
    group->grp_to_abs = NULL;
  }
}
コード例 #18
0
/** Unlock a mutex.
  * 
  * @param[in] hdl         Mutex group that the mutex belongs to.
  * @param[in] mutex       Desired mutex number [0..count-1]
  * @param[in] world_proc  Absolute ID of process where the mutex lives
  */
void ARMCIX_Unlock_hdl(armcix_mutex_hdl_t hdl, int mutex, int world_proc) {
  int       rank, nproc, proc;
  long      unlock_val;

  ARMCII_Assert(mutex >= 0);

  MPI_Comm_rank(hdl->comm, &rank);
  MPI_Comm_size(hdl->comm, &nproc);

  /* User gives us the absolute ID.  Translate to the rank in the mutex's group. */
  proc = ARMCII_Translate_absolute_to_group(hdl->comm, world_proc);
  ARMCII_Assert(proc >= 0);

  unlock_val = -1 * (rank+1);

  /* mutex <- mutex - rank */
  MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->window);
  MPI_Accumulate(&unlock_val, 1, MPI_LONG, proc, mutex, 1, MPI_LONG, MPI_SUM, hdl->window);
  MPI_Win_unlock(proc, hdl->window);
}
コード例 #19
0
/** Attempt to lock a mutex (non-blocking).
  * 
  * @param[in] hdl         Mutex group that the mutex belongs to.
  * @param[in] mutex       Desired mutex number [0..count-1]
  * @param[in] world_proc  Absolute ID of process where the mutex lives
  * @return                0 on success, non-zero on failure
  */
int ARMCIX_Trylock_hdl(armcix_mutex_hdl_t hdl, int mutex, int world_proc) {
  int       rank, nproc, proc;
  long      lock_val, unlock_val, lock_out;

  ARMCII_Assert(mutex >= 0);

  MPI_Comm_rank(hdl->comm, &rank);
  MPI_Comm_size(hdl->comm, &nproc);

  /* User gives us the absolute ID.  Translate to the rank in the mutex's group. */
  proc = ARMCII_Translate_absolute_to_group(hdl->comm, world_proc);
  ARMCII_Assert(proc >= 0);

  lock_val   = rank+1;
  unlock_val = -1 * (rank+1);

  /* mutex <- mutex + rank */
  MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->window);
  MPI_Accumulate(&lock_val, 1, MPI_LONG, proc, mutex, 1, MPI_LONG, MPI_SUM, hdl->window);
  MPI_Win_unlock(proc, hdl->window);

  /* read mutex value */
  MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->window);
  MPI_Get(&lock_out, 1, MPI_LONG, proc, mutex, 1, MPI_LONG, hdl->window);
  MPI_Win_unlock(proc, hdl->window);

  ARMCII_Assert(lock_out > 0);
  ARMCII_Assert(lock_out <= nproc*(nproc+1)/2); // Must be < sum of all ranks

  /* We are holding the mutex */
  if (lock_out == rank+1)
    return 0;

  /* mutex <- mutex - rank */
  MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->window);
  MPI_Accumulate(&unlock_val, 1, MPI_LONG, proc, mutex, 1, MPI_LONG, MPI_SUM, hdl->window);
  MPI_Win_unlock(proc, hdl->window);

  return 1;
}
コード例 #20
0
ファイル: onesided.c プロジェクト: addy004/mpich2-yarn
/** One-sided put operation.
  *
  * @param[in] src    Source address (remote)
  * @param[in] dst    Destination address (local)
  * @param[in] size   Number of bytes to transfer
  * @param[in] target Process id to target
  * @return           0 on success, non-zero on failure
  */
int ARMCI_Put(void *src, void *dst, int size, int target) {
  gmr_t *src_mreg, *dst_mreg;

  src_mreg = gmr_lookup(src, ARMCI_GROUP_WORLD.rank);
  dst_mreg = gmr_lookup(dst, target);

  ARMCII_Assert_msg(dst_mreg != NULL, "Invalid remote pointer");

  /* Local operation */
  if (target == ARMCI_GROUP_WORLD.rank) {
    if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) {
      gmr_dla_lock(dst_mreg);
      if (src_mreg) gmr_dla_lock(src_mreg);
    }

    ARMCI_Copy(src, dst, size);
    
    if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) {
      gmr_dla_unlock(dst_mreg);
      if (src_mreg) gmr_dla_unlock(src_mreg);
    }
  }

  /* Origin buffer is private */
  else if (src_mreg == NULL || ARMCII_GLOBAL_STATE.shr_buf_method == ARMCII_SHR_BUF_NOGUARD) {
    gmr_lock(dst_mreg, target);
    gmr_put(dst_mreg, src, dst, size, target);
    gmr_unlock(dst_mreg, target);
  }

  /* COPY: Either origin and target buffers are in the same window and we can't
   * lock the same window twice (MPI semantics) or the user has requested
   * always-copy mode. */
  else {
    void *src_buf;

    MPI_Alloc_mem(size, MPI_INFO_NULL, &src_buf);
    ARMCII_Assert(src_buf != NULL);

    gmr_dla_lock(src_mreg);
    ARMCI_Copy(src, src_buf, size);
    gmr_dla_unlock(src_mreg);

    gmr_lock(dst_mreg, target);
    gmr_put(dst_mreg, src_buf, dst, size, target);
    gmr_unlock(dst_mreg, target);

    MPI_Free_mem(src_buf);
  }

  return 0;
}
コード例 #21
0
ファイル: gmr.c プロジェクト: abhinavvishnu/matex
/** Unlock a memory region.
  *
  * @param[in] mreg     Memory region
  * @param[in] proc     Absolute process id of the target
  * @return             0 on success, non-zero on failure
  */
void gmr_unlock(gmr_t *mreg, int proc) {
  int grp_proc = ARMCII_Translate_absolute_to_group(&mreg->group, proc);
  int grp_me   = ARMCII_Translate_absolute_to_group(&mreg->group, ARMCI_GROUP_WORLD.rank);

  ARMCII_Assert(grp_proc >= 0 && grp_me >= 0);
  ARMCII_Assert(mreg->lock_state == GMR_LOCK_EXCLUSIVE || mreg->lock_state == GMR_LOCK_SHARED);
  ARMCII_Assert(mreg->lock_target == grp_proc);

  /* Check if DLA is suspended and needs to be resumed */
  if (mreg->dla_lock_count > 0) {

    if (mreg->lock_state != GMR_LOCK_EXCLUSIVE || mreg->lock_target != grp_me) {
      MPI_Win_unlock(grp_proc, mreg->window);
      MPI_Win_lock(MPI_LOCK_EXCLUSIVE, grp_me, 0, mreg->window); // FIXME: NOCHECK here?
    }

    mreg->lock_state = GMR_LOCK_DLA;
    mreg->lock_target= grp_me;
  }
  else {
    MPI_Win_unlock(grp_proc, mreg->window);
    mreg->lock_state = GMR_LOCK_UNLOCKED;
  }
}
コード例 #22
0
ファイル: message.c プロジェクト: jeffhammond/armci-mpi
/** Collective index selection reduce operation (scoped).
  */
void armci_msg_sel_scope(int scope, void *x, int n, char* op, int type, int contribute) {
  MPI_Comm    sel_comm;
  sel_data_t *data_in, *data_out;
  void      **x_buf;

  /*
  printf("[%d] armci_msg_sel_scope(scope=%d, x=%p, n=%d, op=%s, type=%d, contribute=%d)\n",
      ARMCI_GROUP_WORLD.rank, scope, x, n, op, type, contribute);
  */

  /* Determine the scope of the collective operation */
  if (scope == SCOPE_ALL || scope == SCOPE_MASTERS)
    sel_comm = ARMCI_GROUP_WORLD.comm;
  else
    sel_comm = MPI_COMM_SELF;

  data_in  = malloc(sizeof(sel_data_t)+n-1);
  data_out = malloc(sizeof(sel_data_t)+n-1);

  ARMCII_Assert(data_in != NULL && data_out != NULL);

  ARMCII_Buf_prepare_read_vec(&x, &x_buf, 1, n);

  data_in->contribute = contribute;
  data_in->type       = type;

  if (contribute)
    ARMCI_Copy(x, data_in->data, n);

  if (strncmp(op, "min", 3) == 0) {
    MPI_Allreduce(data_in, data_out, sizeof(sel_data_t)+n-1, MPI_BYTE, ARMCI_MPI_SELMIN_OP, sel_comm);
  } else if (strncmp(op, "max", 3) == 0) {
    MPI_Allreduce(data_in, data_out, sizeof(sel_data_t)+n-1, MPI_BYTE, ARMCI_MPI_SELMAX_OP, sel_comm);
  } else {
      ARMCII_Error("Invalid operation (%s)", op);
  }

  ARMCI_Copy(data_out->data, x, n);

  ARMCII_Buf_finish_write_vec(&x, x_buf, 1, n);

  free(data_in);
  free(data_out);
}
コード例 #23
0
ファイル: onesided.c プロジェクト: jeffhammond/armci-mpi
/** One-sided get operation.
  *
  * @param[in] src    Source address (remote)
  * @param[in] dst    Destination address (local)
  * @param[in] size   Number of bytes to transfer
  * @param[in] target Process id to target
  * @return           0 on success, non-zero on failure
  */
int PARMCI_Get(void *src, void *dst, int size, int target) {
  gmr_t *src_mreg, *dst_mreg;

  src_mreg = gmr_lookup(src, target);

  /* If NOGUARD is set, assume the buffer is not shared */
  if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD)
    dst_mreg = gmr_lookup(dst, ARMCI_GROUP_WORLD.rank);
  else
    dst_mreg = NULL;

  ARMCII_Assert_msg(src_mreg != NULL, "Invalid remote pointer");

  /* Local operation */
  if (target == ARMCI_GROUP_WORLD.rank && dst_mreg == NULL) {
    ARMCI_Copy(src, dst, size);
  }

  /* Origin buffer is private */
  else if (dst_mreg == NULL) {
    gmr_get(src_mreg, src, dst, size, target);
    gmr_flush(src_mreg, target, 0); /* it's a round trip so w.r.t. flush, local=remote */
  }

  /* COPY: Either origin and target buffers are in the same window and we can't
   * lock the same window twice (MPI semantics) or the user has requested
   * always-copy mode. */
  else {
    void *dst_buf;

    MPI_Alloc_mem(size, MPI_INFO_NULL, &dst_buf);
    ARMCII_Assert(dst_buf != NULL);

    gmr_get(src_mreg, src, dst_buf, size, target);
    gmr_flush(src_mreg, target, 0); /* it's a round trip so w.r.t. flush, local=remote */

    ARMCI_Copy(dst_buf, dst, size);

    MPI_Free_mem(dst_buf);
  }

  return 0;
}
コード例 #24
0
ファイル: gmr.c プロジェクト: abhinavvishnu/matex
/** Lookup a shared memory region using an address and process id.
  *
  * @param[in] ptr  Pointer within range of the segment (e.g. base pointer).
  * @param[in] proc Process on which the data lives.
  * @return         Pointer to the mem region object.
  */
gmr_t *gmr_lookup(void *ptr, int proc) {
  gmr_t *mreg;

  mreg = gmr_list;

  while (mreg != NULL) {
    ARMCII_Assert(proc < mreg->nslices);

    if (proc < mreg->nslices) {
      const uint8_t   *base = mreg->slices[proc].base;
      const gmr_size_t size = mreg->slices[proc].size;

      if ((uint8_t*) ptr >= base && (uint8_t*) ptr < base + size)
        break;
    }

    mreg = mreg->next;
  }

  return mreg;
}
コード例 #25
0
ファイル: onesided.c プロジェクト: addy004/mpich2-yarn
/** Set the acess mode for the given allocation.  Collective across the
  * allocation's group.  Waits for all processes, finishes all communication,
  * and then sets the new access mode.
  *
  * @param[in] new_mode The new access mode.
  * @param[in] ptr      Pointer within the allocation.
  * @return             Zero upon success, error code otherwise.
  */
int ARMCIX_Mode_set(int new_mode, void *ptr, ARMCI_Group *group) {
  gmr_t *mreg;

  mreg = gmr_lookup(ptr, ARMCI_GROUP_WORLD.rank);
  ARMCII_Assert_msg(mreg != NULL, "Invalid remote pointer");

  ARMCII_Assert(group->comm == mreg->group.comm);

  ARMCII_Assert_msg(mreg->lock_state != GMR_LOCK_DLA,
      "Cannot change the access mode; window is locked for local access.");
  ARMCII_Assert_msg(mreg->lock_state == GMR_LOCK_UNLOCKED,
      "Cannot change the access mode on a window that is locked.");

  // Wait for all processes to complete any outstanding communication before we
  // do the mode switch
  MPI_Barrier(mreg->group.comm);

  mreg->access_mode = new_mode;

  return 0;
}
コード例 #26
0
ファイル: buffer.c プロジェクト: abhinavvishnu/matex
/** Finish a set of prepared buffers.  Will perform communication and copies as
  * needed to ensure results are in the original buffers.  Temporary space will be
  * freed.
  *
  * @param[in]  orig_bufs Original set of buffers.
  * @param[out] new_bufs  Set of private buffers.
  * @param[in]  count     Number of entries in the buffer list.
  * @param[in]  size      The size of the buffers (all are of the same size).
  */
void ARMCII_Buf_finish_write_vec(void **orig_bufs, void **new_bufs, int count, int size) {
  if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) {
    int i;

    for (i = 0; i < count; i++) {
      if (orig_bufs[i] != new_bufs[i]) {
        gmr_t *mreg = gmr_lookup(orig_bufs[i], ARMCI_GROUP_WORLD.rank);
        ARMCII_Assert(mreg != NULL);

        gmr_dla_lock(mreg);
        ARMCI_Copy(new_bufs[i], orig_bufs[i], size);
        // gmr_put(mreg, new_bufs[i], orig_bufs[i], size, ARMCI_GROUP_WORLD.rank);
        gmr_dla_unlock(mreg);

        MPI_Free_mem(new_bufs[i]);
      }
    }

    free(new_bufs);
  }
}
コード例 #27
0
ファイル: buffer.c プロジェクト: abhinavvishnu/matex
/** Prepare a set of buffers for use with a put operation.  The returned set of
  * buffers is guaranteed to be in private space.  Copies will be made if needed,
  * the result should be completed by finish.
  *
  * @param[in]  orig_bufs Original set of buffers.
  * @param[out] new_bufs  Pointer to the set of private buffers.
  * @param[in]  count     Number of entries in the buffer list.
  * @param[in]  size      The size of the buffers (all are of the same size).
  * @return               Number of buffers that were moved.
  */
int ARMCII_Buf_prepare_read_vec(void **orig_bufs, void ***new_bufs_ptr, int count, int size) {
  int num_moved = 0;

  if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) {
    void **new_bufs = malloc(count*sizeof(void*));
    int i;

    for (i = 0; i < count; i++)
      new_bufs[i] = NULL;

    for (i = 0; i < count; i++) {
      // Check if the source buffer is within a shared region.  If so, copy it
      // into a private buffer.
      gmr_t *mreg = gmr_lookup(orig_bufs[i], ARMCI_GROUP_WORLD.rank);

      if (mreg != NULL) {
        MPI_Alloc_mem(size, MPI_INFO_NULL, &new_bufs[i]);
        ARMCII_Assert(new_bufs[i] != NULL);

        gmr_dla_lock(mreg);
        ARMCI_Copy(orig_bufs[i], new_bufs[i], size);
        // gmr_get(mreg, orig_bufs[i], new_bufs[i], size, ARMCI_GROUP_WORLD.rank);
        gmr_dla_unlock(mreg);

        num_moved++;
      } else {
        new_bufs[i] = orig_bufs[i];
      }
    }

    *new_bufs_ptr = new_bufs;
  }
  else {
    *new_bufs_ptr = orig_bufs;
  }
  
  return num_moved;
}
コード例 #28
0
/** General ARMCI global operation (reduction).  Collective on group.
  *
  * @param[in]    scope Scope in which to perform the GOP (only SCOPE_ALL is supported)
  * @param[inout] x     Vector of n doubles, contains input and will contain output.
  * @param[in]    n     Length of x
  * @param[in]    op    One of '+', '*', 'max', 'min', 'absmax', 'absmin'
  * @param[in]    type  Data type of x
  * @param[in]    group Group on which to perform the GOP
  */
void armci_msg_group_gop_scope(int scope, void *x, int n, char *op, int type, ARMCI_Group *group) {
  void        *out;
  MPI_Op       mpi_op;
  MPI_Datatype mpi_type;
  MPI_Comm     comm;
  int          mpi_type_size;

  if (scope == SCOPE_ALL || scope == SCOPE_MASTERS)
    comm = group->comm;
  else
    comm = MPI_COMM_SELF;

  if (op[0] == '+') {
    mpi_op = MPI_SUM;
  } else if (op[0] == '*') {
    mpi_op = MPI_PROD;
  } else if (strncmp(op, "max", 3) == 0) {
    mpi_op = MPI_MAX;
  } else if (strncmp(op, "min", 3) == 0) {
    mpi_op = MPI_MIN;
  } else if (strncmp(op, "or", 2) == 0) {
    mpi_op = MPI_BOR;
  } else if (strncmp(op, "absmax", 6) == 0) {
    mpi_op = MPI_ABSMAX_OP;
  } else if (strncmp(op, "absmin", 6) == 0) {
    mpi_op = MPI_ABSMIN_OP;
  } else {
    ARMCII_Error("unknown operation \'%s\'", op);
    return;
  }

  switch(type) {
    case ARMCI_INT:
      mpi_type = MPI_INT;
      break;
    case ARMCI_LONG:
      mpi_type = MPI_LONG;
      break;
    case ARMCI_LONG_LONG:
      mpi_type = MPI_LONG_LONG;
      break;
    case ARMCI_FLOAT:
      mpi_type = MPI_FLOAT;
      break;
    case ARMCI_DOUBLE:
      mpi_type = MPI_DOUBLE;
      break;
    default:
      ARMCII_Error("unknown type (%d)", type);
      return;
  }

  // ABS MAX/MIN are unary as well as binary.  We need to also apply abs in the
  // single processor case when reduce would normally just be a no-op.
  if (group->size == 1 && (mpi_op == MPI_ABSMAX_OP || mpi_op == MPI_ABSMIN_OP)) {
    ARMCII_Absv_op(x, x, &n, &mpi_type);
    return;
  }

  MPI_Type_size(mpi_type, &mpi_type_size);

  out = malloc(n*mpi_type_size);
  ARMCII_Assert(out != NULL);

  MPI_Allreduce(x, out, n, mpi_type, mpi_op, group->comm);

  ARMCI_Copy(out, x, n*mpi_type_size);
  free(out);
}
コード例 #29
0
ファイル: gmr.c プロジェクト: abhinavvishnu/matex
/** Destroy/free a shared memory region.
  *
  * @param[in] ptr   Pointer within range of the segment (e.g. base pointer).
  * @param[in] group Group on which to perform the free.
  */
void gmr_destroy(gmr_t *mreg, ARMCI_Group *group) {
  int   search_proc_in, search_proc_out, search_proc_out_grp;
  void *search_base;
  int   alloc_me, alloc_nproc;
  int   world_me, world_nproc;

  MPI_Comm_rank(group->comm, &alloc_me);
  MPI_Comm_size(group->comm, &alloc_nproc);
  MPI_Comm_rank(ARMCI_GROUP_WORLD.comm, &world_me);
  MPI_Comm_size(ARMCI_GROUP_WORLD.comm, &world_nproc);

  /* All-to-all exchange of a <base address, proc> pair.  This is so that we
   * can support passing NULL into ARMCI_Free() which is permitted when a
   * process allocates 0 bytes.  Unfortunately, in this case we still need to
   * identify the mem region and free it.
   */

  if (mreg == NULL)
    search_proc_in = -1;
  else {
    search_proc_in = world_me;
    search_base    = mreg->slices[world_me].base;
  }

  /* Collectively decide on who will provide the base address */
  MPI_Allreduce(&search_proc_in, &search_proc_out, 1, MPI_INT, MPI_MAX, group->comm);

  /* Everyone passed NULL.  Nothing to free. */
  if (search_proc_out < 0)
    return;

  /* Translate world rank to group rank */
  search_proc_out_grp = ARMCII_Translate_absolute_to_group(group, search_proc_out);

  /* Broadcast the base address */
  MPI_Bcast(&search_base, sizeof(void*), MPI_BYTE, search_proc_out_grp, group->comm);

  /* If we were passed NULL, look up the mem region using the <base, proc> pair */
  if (mreg == NULL)
    mreg = gmr_lookup(search_base, search_proc_out);

  /* If it's still not found, the user may have passed the wrong group */
  ARMCII_Assert_msg(mreg != NULL, "Could not locate the desired allocation");

  switch (mreg->lock_state) {
    case GMR_LOCK_UNLOCKED:
      break;
    case GMR_LOCK_DLA:
      ARMCII_Warning("Releasing direct local access before freeing shared allocation\n");
      gmr_dla_unlock(mreg);
      break;
    default:
      ARMCII_Error("Unable to free locked memory region (%d)\n", mreg->lock_state);
  }

  /* Remove from the list of mem regions */
  if (mreg->prev == NULL) {
    ARMCII_Assert(gmr_list == mreg);
    gmr_list = mreg->next;

    if (mreg->next != NULL)
      mreg->next->prev = NULL;

  } else {
    mreg->prev->next = mreg->next;
    if (mreg->next != NULL)
      mreg->next->prev = mreg->prev;
  }

  /* Destroy the window and free all buffers */
  MPI_Win_free(&mreg->window);

  if (mreg->slices[world_me].base != NULL)
    MPI_Free_mem(mreg->slices[world_me].base);

  free(mreg->slices);
  ARMCIX_Destroy_mutexes_hdl(mreg->rmw_mutex);

  free(mreg);
}
コード例 #30
0
ファイル: gmr.c プロジェクト: abhinavvishnu/matex
/** Create a distributed shared memory region. Collective on ARMCI group.
  *
  * @param[in]  local_size Size of the local slice of the memory region.
  * @param[out] base_ptrs  Array of base pointers for each process in group.
  * @param[in]  group      Group on which to perform allocation.
  * @return                Pointer to the memory region object.
  */
gmr_t *gmr_create(gmr_size_t local_size, void **base_ptrs, ARMCI_Group *group) {
  int           i;
  gmr_size_t    aggregate_size;
  int           alloc_me, alloc_nproc;
  int           world_me, world_nproc;
  MPI_Group     world_group, alloc_group;
  gmr_t        *mreg;
  gmr_slice_t  *alloc_slices, gmr_slice;

  ARMCII_Assert(local_size >= 0);
  ARMCII_Assert(group != NULL);

  MPI_Comm_rank(group->comm, &alloc_me);
  MPI_Comm_size(group->comm, &alloc_nproc);
  MPI_Comm_rank(ARMCI_GROUP_WORLD.comm, &world_me);
  MPI_Comm_size(ARMCI_GROUP_WORLD.comm, &world_nproc);

  mreg = malloc(sizeof(gmr_t));
  ARMCII_Assert(mreg != NULL);

  mreg->slices = malloc(sizeof(gmr_slice_t)*world_nproc);
  ARMCII_Assert(mreg->slices != NULL);
  alloc_slices = malloc(sizeof(gmr_slice_t)*alloc_nproc);
  ARMCII_Assert(alloc_slices != NULL);

  mreg->group          = *group; /* NOTE: I think it is invalid in GA/ARMCI to
                                    free a group before its allocations.  If
                                    this is not the case, then assignment here
                                    is incorrect and this should really
                                    duplicated the group (communicator). */

  mreg->nslices        = world_nproc;
  mreg->access_mode    = ARMCIX_MODE_ALL;
  mreg->lock_state     = GMR_LOCK_UNLOCKED;
  mreg->dla_lock_count = 0;
  mreg->prev           = NULL;
  mreg->next           = NULL;

  /* Allocate my slice of the GMR */
  alloc_slices[alloc_me].size = local_size;

  if (local_size == 0) {
    alloc_slices[alloc_me].base = NULL;
  } else {
    MPI_Alloc_mem(local_size, MPI_INFO_NULL, &(alloc_slices[alloc_me].base));
    ARMCII_Assert(alloc_slices[alloc_me].base != NULL);
  }

  /* Debugging: Zero out shared memory if enabled */
  if (ARMCII_GLOBAL_STATE.debug_alloc && local_size > 0) {
    ARMCII_Assert(alloc_slices[alloc_me].base != NULL);
    ARMCII_Bzero(alloc_slices[alloc_me].base, local_size);
  }

  /* All-to-all on <base, size> to build up slices vector */
  gmr_slice = alloc_slices[alloc_me];
  MPI_Allgather(  &gmr_slice, sizeof(gmr_slice_t), MPI_BYTE,
                 alloc_slices, sizeof(gmr_slice_t), MPI_BYTE, group->comm);

  /* Check for a global size 0 allocation */
  for (i = aggregate_size = 0; i < alloc_nproc; i++) {
    aggregate_size += alloc_slices[i].size;
  }

  /* Everyone asked for 0 bytes, return a NULL vector */
  if (aggregate_size == 0) {
    free(alloc_slices);
    free(mreg->slices);
    free(mreg);

    for (i = 0; i < alloc_nproc; i++)
      base_ptrs[i] = NULL;

    return NULL;
  }

  MPI_Win_create(alloc_slices[alloc_me].base, (MPI_Aint) local_size, 1, MPI_INFO_NULL, group->comm, &mreg->window);

  /* Populate the base pointers array */
  for (i = 0; i < alloc_nproc; i++)
    base_ptrs[i] = alloc_slices[i].base;

  /* We have to do lookup on global ranks, so shovel the contents of
     alloc_slices into the mreg->slices array which is indexed by global rank. */
  memset(mreg->slices, 0, sizeof(gmr_slice_t)*world_nproc);

  MPI_Comm_group(ARMCI_GROUP_WORLD.comm, &world_group);
  MPI_Comm_group(group->comm, &alloc_group);

  for (i = 0; i < alloc_nproc; i++) {
    int world_rank;
    MPI_Group_translate_ranks(alloc_group, 1, &i, world_group, &world_rank);
    mreg->slices[world_rank] = alloc_slices[i];
  }

  free(alloc_slices);
  MPI_Group_free(&world_group);
  MPI_Group_free(&alloc_group);

  /* Create the RMW mutex: Keeps RMW operations atomic wrt each other */
  mreg->rmw_mutex = ARMCIX_Create_mutexes_hdl(1, group);

  /* Append the new region onto the region list */
  if (gmr_list == NULL) {
    gmr_list = mreg;

  } else {
    gmr_t *parent = gmr_list;

    while (parent->next != NULL)
      parent = parent->next;

    parent->next = mreg;
    mreg->prev   = parent;
  }

  return mreg;
}