예제 #1
0
/**
 * \brief ARMCI Extension non-blocking put operation.
 *
 * \param[in] src       Source buffer on the local node
 * \param[in] dst       Destination buffer on the remote node
 * \param[in] bytes     Number of bytes to transfer
 * \param[in] proc      Remote node rank
 * \param[in] nb_handle ARMCI non-blocking handle
 *
 * \return ???
 */
int ARMCIX_NbPut (void * src, void * dst, int bytes, int proc, armci_ihdl_t nb_handle)
{
  DCMF_CriticalSection_enter (0);

  armcix_dcmf_opaque_t * dcmf = (armcix_dcmf_opaque_t *) &nb_handle->cmpl_info;
  dcmf->active = 1;
  dcmf->connection = &__connection[proc];

  __connection[proc].active++;
  __global_connection.active++;

  DCMF_Callback_t cb_free = { ARMCIX_DCMF_NbOp_cb_done, nb_handle };
  ARMCIX_DCMF_Request_t * new_request = ARMCIX_DCMF_request_allocate (cb_free);
  DCMF_Callback_t cb_done = { (void(*)(void *)) ARMCIX_DCMF_request_free, new_request };

  DCMF_Memregion_t * src_memregion = &__connection[proc].local_mem_region;
  DCMF_Memregion_t * dst_memregion = &__connection[proc].remote_mem_region;

  DCMF_Result result =
    DCMF_Put (&__put_protocol,
              &(new_request->request),
              cb_done,
              DCMF_SEQUENTIAL_CONSISTENCY,
              proc,
              bytes,
              src_memregion,
              dst_memregion,
              armcix_dcmf_va_to_offset (src_memregion, src),
              armcix_dcmf_va_to_offset (dst_memregion, dst));

  DCMF_CriticalSection_exit  (0);

  return (result != DCMF_SUCCESS);
}
예제 #2
0
/**
 * \brief ARMCI Extension blocking put operation.
 *
 * \param[in] src       Source buffer on the local node
 * \param[in] dst       Destination buffer on the remote node
 * \param[in] bytes     Number of bytes to transfer
 * \param[in] proc      Remote node rank
 *
 * \return ???
 */
int ARMCIX_Put( void * src, void * dst, int bytes, int proc)
{
  DCMF_CriticalSection_enter (0);

  volatile unsigned active = 1;
  DCMF_Callback_t cb_wait = { ARMCIX_DCMF_cb_decrement, (void *)&active };
  DCMF_Request_t request;

  DCMF_Memregion_t * src_memregion = &__connection[proc].local_mem_region;
  DCMF_Memregion_t * dst_memregion = &__connection[proc].remote_mem_region;

  DCMF_Result result =
    DCMF_Put (&__put_protocol,
              &request,
              cb_wait,
              DCMF_SEQUENTIAL_CONSISTENCY,
              proc,
              bytes,
              src_memregion,
              dst_memregion,
              armcix_dcmf_va_to_offset (src_memregion, src),
              armcix_dcmf_va_to_offset (dst_memregion, dst));

#ifdef BLOCKING_OPERATIONS_REQUIRE_FENCE
  ARMCIX_Fence (proc);
#else
  while (active) DCMF_Messager_advance ();
#endif

  DCMF_CriticalSection_exit  (0);

  return (result != DCMF_SUCCESS);
}
예제 #3
0
/**
 * \brief ARMCI Extension non-blocking vector get operation.
 *
 * \param[in] darr      Descriptor array
 * \param[in] len       Length of descriptor array
 * \param[in] proc      Remote process(or) ID
 * \param[in] nb_handle ARMCI non-blocking handle
 *
 * \return ???
 */
int ARMCIX_NbGetV (armci_giov_t * darr, int len, int proc, armci_ihdl_t nb_handle)
{
  DCMF_Result result = DCMF_ERROR;

  DCMF_CriticalSection_enter (0);

  //fprintf (stderr, "ARMCIX_NbGetV() >> len=%d, proc=%d\n", len, proc);

  // Calculate the number of requests
  unsigned n = 0;
  unsigned i, j;
  for (i = 0; i < len; i++)
    for (j = 0; j < darr[i].ptr_array_len; j++)
      n++;

  armcix_dcmf_opaque_t * dcmf = (armcix_dcmf_opaque_t *) &nb_handle->cmpl_info;
  dcmf->connection = &__connection[proc];
  dcmf->active = n;

  __connection[proc].active += n;
  __global_connection.active += n;

  //fprintf (stderr, "ARMCIX_NbGetV() -- n=%d, dcmf->active=%d, __connection[%d].active=%d, __global_connection.active=%d\n", n, dcmf->active, proc, __connection[proc].active, __global_connection.active);

  DCMF_Memregion_t * src_memregion = &__connection[proc].remote_mem_region;
  DCMF_Memregion_t * dst_memregion = &__connection[proc].local_mem_region;

  DCMF_Callback_t cb_free = { ARMCIX_DCMF_NbOp_cb_done, nb_handle };
  DCMF_Callback_t cb_done = { (void(*)(void *)) ARMCIX_DCMF_request_free, NULL };
  for (i = 0; i < len; i++)
  {
    for (j = 0; j < darr[i].ptr_array_len; j++)
    {
      //fprintf (stderr, "ARMCIX_NbGetV() -- src=%p, dst=%p, bytes=%d\n", darr[i].src_ptr_array[j], darr[i].dst_ptr_array[j], darr[i].bytes);
      ARMCIX_DCMF_Request_t * new_request = ARMCIX_DCMF_request_allocate (cb_free);
      cb_done.clientdata = new_request;

      result =
        DCMF_Get (&__get_protocol,
                  &(new_request->request),
                  cb_done,
                  DCMF_SEQUENTIAL_CONSISTENCY,
                  proc,
                  darr[i].bytes,
                  src_memregion,
                  dst_memregion,
                  armcix_dcmf_va_to_offset (src_memregion, darr[i].src_ptr_array[j]),
                  armcix_dcmf_va_to_offset (dst_memregion, darr[i].dst_ptr_array[j]));
    }
  }

  //fprintf (stderr, "ARMCIX_NbGetV() << result=%d\n", result);
  DCMF_CriticalSection_exit  (0);

  return (result != DCMF_SUCCESS);
}
예제 #4
0
unsigned ARMCIX_DCMF_PutS_recurse (void * src_ptr, int * src_stride_arr, 
                                   void * dst_ptr, int * dst_stride_arr, 
                                   int * seg_count, int stride_levels, int proc,
                                   armci_ihdl_t nb_handle)
{
  unsigned num_requests = 0;

  //fprintf (stderr, "ARMCIX_DCMF_PutS_recurse() >> \n");

  if (stride_levels == 0)
  {
    //fprintf (stderr, "ARMCIX_DCMF_PutS_recurse() dst=%p, src=%p, bytes=%d, nb_handle=%p\n", dst_ptr, src_ptr, seg_count[0], nb_handle);

    DCMF_Callback_t cb_free = { ARMCIX_DCMF_NbOp_cb_done, nb_handle };
    ARMCIX_DCMF_Request_t * new_request = ARMCIX_DCMF_request_allocate (cb_free);
    DCMF_Callback_t cb_done = { (void(*)(void *)) ARMCIX_DCMF_request_free, new_request };

    DCMF_Memregion_t * src_memregion = &__connection[proc].local_mem_region;
    DCMF_Memregion_t * dst_memregion = &__connection[proc].remote_mem_region;

    DCMF_Put (&__put_protocol,
              &(new_request->request),
              cb_done,
              DCMF_SEQUENTIAL_CONSISTENCY,
              proc,
              seg_count[0],
              src_memregion,
              dst_memregion,
              armcix_dcmf_va_to_offset (src_memregion, src_ptr),
              armcix_dcmf_va_to_offset (dst_memregion, dst_ptr));

    num_requests++;
  }
  else
  {
    char * src_tmp = (char *) src_ptr;
    char * dst_tmp = (char *) dst_ptr;
    unsigned i;
    for (i = 0; i < seg_count[stride_levels]; i++)
    {
      num_requests += ARMCIX_DCMF_PutS_recurse (src_tmp, src_stride_arr, 
                                                dst_tmp, dst_stride_arr, 
                                                seg_count, (stride_levels-1), proc,
                                                nb_handle);

      src_tmp += src_stride_arr[(stride_levels-1)];
      dst_tmp += dst_stride_arr[(stride_levels-1)];
    }
  }

  //fprintf (stderr, "ARMCIX_DCMF_PutS_recurse() << num_requests = %d\n", num_requests);

  return num_requests;
}