示例#1
0
int MPID_nem_mxm_SendNoncontig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
                               MPIDI_msg_sz_t hdr_sz)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_msg_sz_t last;
    MPID_nem_mxm_vc_area *vc_area = NULL;
    MPID_nem_mxm_req_area *req_area = NULL;

    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_SENDNONCONTIGMSG);
    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_SENDNONCONTIGMSG);

    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "MPID_nem_mxm_iSendNoncontig");

    MPIU_Memcpy(&(sreq->dev.pending_pkt), (char *) hdr, sizeof(MPIDI_CH3_Pkt_t));

    _dbg_mxm_output(5,
                    "SendNoncontig ========> Sending ADI msg (to=%d type=%d) for req %p (data_size %d, %d) \n",
                    vc->pg_rank, sreq->dev.pending_pkt.type, sreq, sizeof(MPIDI_CH3_Pkt_t),
                    sreq->dev.segment_size);

    vc_area = VC_BASE(vc);
    req_area = REQ_BASE(sreq);

    req_area->ctx = sreq;
    req_area->iov_buf = req_area->tmp_buf;
    req_area->iov_count = 1;
    req_area->iov_buf[0].ptr = (void *) &(sreq->dev.pending_pkt);
    req_area->iov_buf[0].length = sizeof(MPIDI_CH3_Pkt_t);

    MPIU_Assert(sreq->dev.segment_first == 0);
    last = sreq->dev.segment_size;
    if (last > 0) {
        sreq->dev.tmpbuf = MPIU_Malloc((size_t) sreq->dev.segment_size);
        MPIU_Assert(sreq->dev.tmpbuf);
        MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.tmpbuf);
        MPIU_Assert(last == sreq->dev.segment_size);

        req_area->iov_count = 2;
        req_area->iov_buf[1].ptr = sreq->dev.tmpbuf;
        req_area->iov_buf[1].length = last;
    }

    vc_area->pending_sends += 1;
    sreq->ch.vc = vc;
    sreq->ch.noncontig = TRUE;

    mpi_errno = _mxm_isend(vc_area->mxm_ep, req_area, MXM_MPICH_ISEND_AM,
                           mxm_obj->mxm_mq, mxm_obj->mxm_rank, MXM_MPICH_HID_ADI_MSG, 0, 0);
    if (mpi_errno)
        MPIU_ERR_POP(mpi_errno);

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_SENDNONCONTIGMSG);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
示例#2
0
int MPID_nem_mxm_issend(MPIDI_VC_t * vc, const void *buf, int count, MPI_Datatype datatype,
                        int rank, int tag, MPID_Comm * comm, int context_offset,
                        MPID_Request ** sreq_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *sreq = NULL;
    MPID_Datatype *dt_ptr;
    int dt_contig;
    MPIDI_msg_sz_t data_sz;
    MPI_Aint dt_true_lb;
    MPID_nem_mxm_vc_area *vc_area = NULL;
    MPID_nem_mxm_req_area *req_area = NULL;

    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ISSEND);
    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_ISSEND);

    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);

    /* create a request */
    MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
    MPIU_Assert(sreq != NULL);
    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
    MPIDI_VC_FAI_send_seqnum(vc, seqnum);
    MPIDI_Request_set_seqnum(sreq, seqnum);
    if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) {
        MPID_Datatype_get_ptr(datatype, sreq->dev.datatype_ptr);
        MPID_Datatype_add_ref(sreq->dev.datatype_ptr);
    }
    sreq->partner_request = NULL;
    sreq->dev.OnDataAvail = NULL;
    sreq->dev.tmpbuf = NULL;
    sreq->ch.vc = vc;
    sreq->ch.noncontig = FALSE;

    _dbg_mxm_output(5,
                    "isSend ========> Sending USER msg for req %p (context %d to %d tag %d size %d) \n",
                    sreq, comm->context_id + context_offset, rank, tag, data_sz);

    vc_area = VC_BASE(vc);
    req_area = REQ_BASE(sreq);

    req_area-> ctx = sreq;
    req_area->iov_buf = req_area->tmp_buf;
    req_area->iov_count = 0;
    req_area->iov_buf[0].ptr = NULL;
    req_area->iov_buf[0].length = 0;

    if (data_sz) {
        if (dt_contig) {
            req_area->iov_count = 1;
            req_area->iov_buf[0].ptr = (char *) (buf) + dt_true_lb;
            req_area->iov_buf[0].length = data_sz;
        }
        else {
            MPIDI_msg_sz_t last;
            MPI_Aint packsize = 0;

            sreq->ch.noncontig = TRUE;
            sreq->dev.segment_ptr = MPID_Segment_alloc();
            MPIU_ERR_CHKANDJUMP1((sreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER,
                                 "**nomem", "**nomem %s", "MPID_Segment_alloc");
            MPIR_Pack_size_impl(count, datatype, &packsize);

            last = data_sz;
            if (packsize > 0) {
                sreq->dev.tmpbuf = MPIU_Malloc((size_t) packsize);
                MPIU_Assert(sreq->dev.tmpbuf);
                MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
                MPID_Segment_pack(sreq->dev.segment_ptr, 0, &last, sreq->dev.tmpbuf);

                req_area->iov_count = 1;
                req_area->iov_buf[0].ptr = sreq->dev.tmpbuf;
                req_area->iov_buf[0].length = last;
            }
        }
    }

    vc_area->pending_sends += 1;

    mpi_errno = _mxm_isend(vc_area->mxm_ep, req_area, MXM_MPICH_ISEND_SYNC,
                           (mxm_mq_h) comm->dev.ch.netmod_priv, comm->rank, tag, _mxm_tag_mpi2mxm(tag,
                                                                                              comm->context_id
                                                                                              +
                                                                                              context_offset),
                           0);
    if (mpi_errno)
        MPIU_ERR_POP(mpi_errno);

    _dbg_mxm_out_req(sreq);

  fn_exit:
    *sreq_ptr = sreq;
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_ISSEND);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
示例#3
0
/**
 * \brief MPID buffer copy
 *
 * Implements non-contiguous buffers correctly.
 *
 * \param[in]  sbuf       The address of the input buffer
 * \param[in]  scount     The number of elements in that buffer
 * \param[in]  sdt        The datatype of those elements
 * \param[out] smpi_errno Returns errors
 * \param[in]  rbuf       The address of the output buffer
 * \param[out] rcount     The number of elements in that buffer
 * \param[in]  rdt        The datatype of those elements
 * \param[out] rsz        The size of the ouput data
 * \param[out] rmpi_errno Returns errors
 */
void MPIDI_Buffer_copy(
    const void * const sbuf, MPI_Aint scount, MPI_Datatype sdt,                       int * smpi_errno,
          void * const rbuf, MPI_Aint rcount, MPI_Datatype rdt, MPIDI_msg_sz_t * rsz, int * rmpi_errno)
{
    int sdt_contig;
    int rdt_contig;
    MPI_Aint sdt_true_lb, rdt_true_lb;
    MPIDI_msg_sz_t sdata_sz;
    MPIDI_msg_sz_t rdata_sz;
    MPID_Datatype * sdt_ptr;
    MPID_Datatype * rdt_ptr;

    MPI_Aint  sdt_extent;
    MPI_Aint  rdt_extent;

    *smpi_errno = MPI_SUCCESS;
    *rmpi_errno = MPI_SUCCESS;

    /* printf("bufcopy: src count=%d dt=%d\n", scount, sdt); */
    /* printf("bufcopy: dst count=%d dt=%d\n", rcount, rdt); */

    MPIDI_Datatype_get_info(scount, sdt, sdt_contig, sdata_sz, sdt_ptr, sdt_true_lb);
    MPIDI_Datatype_get_info(rcount, rdt, rdt_contig, rdata_sz, rdt_ptr, rdt_true_lb);

    /* --BEGIN ERROR HANDLING-- */
    if (sdata_sz > rdata_sz)
    {
        *rmpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, __FUNCTION__, __LINE__, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", sdata_sz, rdata_sz );
        sdata_sz = rdata_sz;
    }
    /* --END ERROR HANDLING-- */

    if (sdata_sz == 0)
    {
        *rsz = 0;
        goto fn_exit;
    }

    if (sdt_contig && rdt_contig)
    {
#if CUDA_AWARE_SUPPORT
      if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(rbuf))
      {
        cudaError_t cudaerr = CudaMemcpy(rbuf + rdt_true_lb, sbuf + sdt_true_lb, sdata_sz, cudaMemcpyHostToDevice);
      }
      else
#endif
        memcpy((char*)rbuf + rdt_true_lb, (const char *)sbuf + sdt_true_lb, sdata_sz);
        *rsz = sdata_sz;
    }
    else if (sdt_contig)
    {
#if CUDA_AWARE_SUPPORT
      // This will need to be done in two steps:
      // 1 - Allocate a temp buffer which is the same size as user buffer and unpack in it.
      // 2 - Copy unpacked data into user buffer from temp buffer.
      if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(rbuf))
      {
        MPID_Datatype_get_extent_macro(rdt, rdt_extent);
        char *buf =  MPL_malloc(rdt_extent * rcount);
        memset(buf, 0, rdt_extent * rcount);        
        MPID_Segment seg;
        DLOOP_Offset last;

        MPID_Segment_init(buf, rcount, rdt, &seg, 0);
        last = sdata_sz;
        MPID_Segment_unpack(&seg, 0, &last, (char*)sbuf + sdt_true_lb);
        /* --BEGIN ERROR HANDLING-- */
        if (last != sdata_sz)
        {
            *rmpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, __FUNCTION__, __LINE__, MPI_ERR_TYPE, "**dtypemismatch", 0);
        }
        /* --END ERROR HANDLING-- */

       *rsz = last;

        
        cudaError_t cudaerr = CudaMemcpy(rbuf + rdt_true_lb, buf, rdt_extent * rcount, cudaMemcpyHostToDevice);

        MPL_free(buf);

        goto fn_exit;

      }
#endif

        MPID_Segment seg;
        DLOOP_Offset last;

        MPID_Segment_init(rbuf, rcount, rdt, &seg, 0);
        last = sdata_sz;
        MPID_Segment_unpack(&seg, 0, &last, (char*)sbuf + sdt_true_lb);
        /* --BEGIN ERROR HANDLING-- */
        if (last != sdata_sz)
        {
            *rmpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, __FUNCTION__, __LINE__, MPI_ERR_TYPE, "**dtypemismatch", 0);
        }
        /* --END ERROR HANDLING-- */

        *rsz = last;
    }
    else if (rdt_contig)
    {
        MPID_Segment seg;
        DLOOP_Offset last;

        MPID_Segment_init(sbuf, scount, sdt, &seg, 0);
        last = sdata_sz;
        MPID_Segment_pack(&seg, 0, &last, (char*)rbuf + rdt_true_lb);
        /* --BEGIN ERROR HANDLING-- */
        if (last != sdata_sz)
        {
            *rmpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, __FUNCTION__, __LINE__, MPI_ERR_TYPE, "**dtypemismatch", 0);
        }
        /* --END ERROR HANDLING-- */

        *rsz = last;
    }
    else
    {
        char * buf;
        MPIDI_msg_sz_t buf_off;
        MPID_Segment sseg;
        MPIDI_msg_sz_t sfirst;
        MPID_Segment rseg;
        MPIDI_msg_sz_t rfirst;

        buf = MPL_malloc(MPIDI_COPY_BUFFER_SZ);
        /* --BEGIN ERROR HANDLING-- */
        if (buf == NULL)
        {
            *smpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, __FUNCTION__, __LINE__, MPI_ERR_OTHER, "**nomem", 0);
            *rmpi_errno = *smpi_errno;
            *rsz = 0;
            goto fn_exit;
        }
        /* --END ERROR HANDLING-- */

        MPID_Segment_init(sbuf, scount, sdt, &sseg, 0);
        MPID_Segment_init(rbuf, rcount, rdt, &rseg, 0);

        sfirst = 0;
        rfirst = 0;
        buf_off = 0;

        for(;;)
        {
            DLOOP_Offset last;
            char * buf_end;

            if (sdata_sz - sfirst > MPIDI_COPY_BUFFER_SZ - buf_off)
            {
                last = sfirst + (MPIDI_COPY_BUFFER_SZ - buf_off);
            }
            else
            {
                last = sdata_sz;
            }

            MPID_Segment_pack(&sseg, sfirst, &last, buf + buf_off);
            /* --BEGIN ERROR HANDLING-- */
            MPID_assert(last > sfirst);
            /* --END ERROR HANDLING-- */

            buf_end = buf + buf_off + (last - sfirst);
            sfirst = last;

            MPID_Segment_unpack(&rseg, rfirst, &last, buf);
            /* --BEGIN ERROR HANDLING-- */
            MPID_assert(last > rfirst);
            /* --END ERROR HANDLING-- */

            rfirst = last;

            if (rfirst == sdata_sz)
            {
                /* successful completion */
                break;
            }

            /* --BEGIN ERROR HANDLING-- */
            if (sfirst == sdata_sz)
            {
                /* datatype mismatch -- remaining bytes could not be unpacked */
                *rmpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, __FUNCTION__, __LINE__, MPI_ERR_TYPE, "**dtypemismatch", 0);
                break;
            }
            /* --END ERROR HANDLING-- */

            buf_off = sfirst - rfirst;
            if (buf_off > 0)
            {
                memmove(buf, buf_end - buf_off, buf_off);
            }
        }

        *rsz = rfirst;
        MPL_free(buf);
    }

  fn_exit:
    return;
}
示例#4
0
int MPIDI_CH3U_Request_load_send_iov(MPID_Request * const sreq, 
				     MPID_IOV * const iov, int * const iov_n)
{
    MPI_Aint last;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV);
    MPIU_Assert(sreq->dev.segment_ptr != NULL);
    last = sreq->dev.segment_size;
    MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST,
     "pre-pv: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT ", iov_n=%d",
		      sreq->dev.segment_first, last, *iov_n));
    MPIU_Assert(sreq->dev.segment_first < last);
    MPIU_Assert(last > 0);
    MPIU_Assert(*iov_n > 0 && *iov_n <= MPID_IOV_LIMIT);
    MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, 
			     &last, iov, iov_n);
    MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST,
    "post-pv: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT ", iov_n=%d",
		      sreq->dev.segment_first, last, *iov_n));
    MPIU_Assert(*iov_n > 0 && *iov_n <= MPID_IOV_LIMIT);
    
    if (last == sreq->dev.segment_size)
    {
	MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"remaining data loaded into IOV");
	sreq->dev.OnDataAvail = sreq->dev.OnFinal;
    }
    else if ((last - sreq->dev.segment_first) / *iov_n >= MPIDI_IOV_DENSITY_MIN)
    {
	MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"more data loaded into IOV");
	sreq->dev.segment_first = last;
	sreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_SendReloadIOV;
    }
    else
    {
	MPIDI_msg_sz_t data_sz;
	int i, iov_data_copied;
	
	MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"low density.  using SRBuf.");
	    
	data_sz = sreq->dev.segment_size - sreq->dev.segment_first;
	if (!MPIDI_Request_get_srbuf_flag(sreq))
	{
	    MPIDI_CH3U_SRBuf_alloc(sreq, data_sz);
	    /* --BEGIN ERROR HANDLING-- */
	    if (sreq->dev.tmpbuf_sz == 0)
	    {
		MPIU_DBG_MSG(CH3_CHANNEL,TYPICAL,"SRBuf allocation failure");
		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, 
                                FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 
						 "**nomem %d", data_sz);
		sreq->status.MPI_ERROR = mpi_errno;
		goto fn_exit;
	    }
	    /* --END ERROR HANDLING-- */
	}

	iov_data_copied = 0;
	for (i = 0; i < *iov_n; i++) {
	    MPIU_Memcpy((char*) sreq->dev.tmpbuf + iov_data_copied, 
		   iov[i].MPID_IOV_BUF, iov[i].MPID_IOV_LEN);
	    iov_data_copied += iov[i].MPID_IOV_LEN;
	}
	sreq->dev.segment_first = last;

	last = (data_sz <= sreq->dev.tmpbuf_sz - iov_data_copied) ? 
	    sreq->dev.segment_size :
	    sreq->dev.segment_first + sreq->dev.tmpbuf_sz - iov_data_copied;
	MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST,
               "pre-pack: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT,
			  sreq->dev.segment_first, last));
	MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, 
			  &last, (char*) sreq->dev.tmpbuf + iov_data_copied);
	MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST,
              "post-pack: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT,
			   sreq->dev.segment_first, last));
	iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)sreq->dev.tmpbuf;
	iov[0].MPID_IOV_LEN = last - sreq->dev.segment_first + iov_data_copied;
	*iov_n = 1;
	if (last == sreq->dev.segment_size)
	{
	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"remaining data packed into SRBuf");
	    sreq->dev.OnDataAvail = sreq->dev.OnFinal;
	}
	else 
	{
	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"more data packed into SRBuf");
	    sreq->dev.segment_first = last;
	    sreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_SendReloadIOV;
	}
    }
    
  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV);
    return mpi_errno;
}
示例#5
0
int MPIDO_Gatherv_simple(const void *sendbuf, 
                  int sendcount, 
                  MPI_Datatype sendtype,
                  void *recvbuf, 
                  const int *recvcounts, 
                  const int *displs, 
                  MPI_Datatype recvtype,
                  int root, 
                  MPID_Comm * comm_ptr, 
                  int *mpierrno)

{
#ifndef HAVE_PAMI_IN_PLACE
  if (sendbuf == MPI_IN_PLACE)
  {
    MPID_Abort (NULL, 0, 1, "'MPI_IN_PLACE' requries support for `PAMI_IN_PLACE`");
    return -1;
  }
#endif
   TRACE_ERR("Entering MPIDO_Gatherv_optimized\n");
   int snd_contig = 1, rcv_contig = 1;
   void *snd_noncontig_buff = NULL, *rcv_noncontig_buff = NULL;
   void *sbuf = NULL, *rbuf = NULL;
   int  *rcounts = NULL;
   int  *rdispls = NULL;
   int send_size = 0;
   int recv_size = 0;
   int rcvlen    = 0;
  int totalrecvcount  = 0;
   pami_type_t rtype = PAMI_TYPE_NULL;
   MPID_Segment segment;
   MPID_Datatype *data_ptr = NULL;
   int send_true_lb, recv_true_lb = 0;
   int i, tmp;
   volatile unsigned gatherv_active = 1;
   const int rank = comm_ptr->rank;
   const int size = comm_ptr->local_size;
#if ASSERT_LEVEL==0
   /* We can't afford the tracing in ndebug/performance libraries */
    const unsigned verbose = 0;
#else
    const unsigned verbose = (MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL) && (rank == 0);
#endif

   const struct MPIDI_Comm* const mpid = &(comm_ptr->mpid);
  int recvok=PAMI_SUCCESS, recvcontinuous=0;

   if(sendbuf != MPI_IN_PLACE)
   {
     MPIDI_Datatype_get_info(sendcount, sendtype, snd_contig,
                            send_size, data_ptr, send_true_lb);
    if(MPIDI_Pamix_collsel_advise != NULL && mpid->collsel_fast_query != NULL)
    {
      advisor_algorithm_t advisor_algorithms[1];
      int num_algorithms = MPIDI_Pamix_collsel_advise(mpid->collsel_fast_query, PAMI_XFER_GATHERV_INT, 64, advisor_algorithms, 1);
      if(num_algorithms)
      {
        if(advisor_algorithms[0].algorithm_type == COLLSEL_EXTERNAL_ALGO)
        {
          return MPIR_Gatherv(sendbuf, sendcount, sendtype,
                              recvbuf, recvcounts, displs, recvtype,
                              root, comm_ptr, mpierrno);
        }
        else if(advisor_algorithms[0].metadata && advisor_algorithms[0].metadata->check_correct.values.asyncflowctl && !(--(comm_ptr->mpid.num_requests)))
        {
          comm_ptr->mpid.num_requests = MPIDI_Process.optimized.num_requests;
          int tmpmpierrno;
          if(unlikely(verbose))
            fprintf(stderr,"Query barrier required for %s\n", advisor_algorithms[0].metadata->name);
          MPIDO_Barrier(comm_ptr, &tmpmpierrno);
        }
      }
    }

    sbuf = (char *)sendbuf + send_true_lb;
    if(!snd_contig)
    {
      snd_noncontig_buff = MPL_malloc(send_size);
      sbuf = snd_noncontig_buff;
      if(snd_noncontig_buff == NULL)
      {
        MPID_Abort(NULL, MPI_ERR_NO_SPACE, 1,
                   "Fatal:  Cannot allocate pack buffer");
      }
      DLOOP_Offset last = send_size;
      MPID_Segment_init(sendbuf, sendcount, sendtype, &segment, 0);
      MPID_Segment_pack(&segment, 0, &last, snd_noncontig_buff);
    }
  }
  else
  {
    MPIDI_Datatype_get_info(1, recvtype, rcv_contig,
                            rcvlen, data_ptr, recv_true_lb);
    if(MPIDI_Pamix_collsel_advise != NULL && mpid->collsel_fast_query != NULL)
    {
      advisor_algorithm_t advisor_algorithms[1];
      int num_algorithms = MPIDI_Pamix_collsel_advise(mpid->collsel_fast_query, PAMI_XFER_GATHERV_INT, 64, advisor_algorithms, 1);
      if(num_algorithms)
      {
        if(advisor_algorithms[0].algorithm_type == COLLSEL_EXTERNAL_ALGO)
        {
          return MPIR_Gatherv(sendbuf, sendcount, sendtype,
                              recvbuf, recvcounts, displs, recvtype,
                              root, comm_ptr, mpierrno);
        }
        else if(advisor_algorithms[0].metadata && advisor_algorithms[0].metadata->check_correct.values.asyncflowctl && !(--(comm_ptr->mpid.num_requests)))
        {
          comm_ptr->mpid.num_requests = MPIDI_Process.optimized.num_requests;
          int tmpmpierrno;
          if(unlikely(verbose))
            fprintf(stderr,"Query barrier required for %s\n", advisor_algorithms[0].metadata->name);
          MPIDO_Barrier(comm_ptr, &tmpmpierrno);
        }
      }
    }
  }

   pami_xfer_t gatherv;
   rbuf = (char *)recvbuf + recv_true_lb;
   rcounts = (int*)recvcounts;
   rdispls = (int*)displs;
   if(rank == root)
   {
    if((recvok = MPIDI_Datatype_to_pami(recvtype, &rtype, -1, NULL, &tmp)) != MPI_SUCCESS)
      {
        MPIDI_Datatype_get_info(1, recvtype, rcv_contig,
                                rcvlen, data_ptr, recv_true_lb);
      totalrecvcount = recvcounts[0];
      recvcontinuous = displs[0] == 0? 1 : 0 ;
          rcounts = (int*)MPL_malloc(size);
          rdispls = (int*)MPL_malloc(size);
      rdispls[0] = 0;
      rcounts[0] = rcvlen * recvcounts[0];
      for(i = 1; i < size; i++)
      {
        rdispls[i]= rcvlen * totalrecvcount;
        totalrecvcount += recvcounts[i];
        if(displs[i] != (displs[i-1] + recvcounts[i-1]))
          recvcontinuous = 0;
            rcounts[i] = rcvlen * recvcounts[i];
          }
      recv_size = rcvlen * totalrecvcount;

          rcv_noncontig_buff = MPL_malloc(recv_size);
          rbuf = rcv_noncontig_buff;
          rtype = PAMI_TYPE_BYTE;
          if(rcv_noncontig_buff == NULL)
          {
             MPID_Abort(NULL, MPI_ERR_NO_SPACE, 1,
                "Fatal:  Cannot allocate pack buffer");
          }
      if(sendbuf == MPI_IN_PLACE)
      {
        size_t extent;
        MPID_Datatype_get_extent_macro(recvtype,extent);
        MPIR_Localcopy(recvbuf + displs[rank]*extent, recvcounts[rank], recvtype,
                     rcv_noncontig_buff + rdispls[rank], rcounts[rank],MPI_CHAR);
      }
    }
    if(sendbuf == MPI_IN_PLACE)
    {
      gatherv.cmd.xfer_gatherv_int.sndbuf = PAMI_IN_PLACE;
    }
    else
    {
      gatherv.cmd.xfer_gatherv_int.sndbuf = sbuf;
    }
    gatherv.cmd.xfer_gatherv_int.stype = PAMI_TYPE_BYTE;/* stype is ignored when sndbuf == PAMI_IN_PLACE */
    gatherv.cmd.xfer_gatherv_int.stypecount = send_size;

  }
  else
  {
    gatherv.cmd.xfer_gatherv_int.sndbuf = sbuf;
    gatherv.cmd.xfer_gatherv_int.stype = PAMI_TYPE_BYTE;
    gatherv.cmd.xfer_gatherv_int.stypecount = send_size;     
  }


  gatherv.cb_done = cb_gatherv;
  gatherv.cookie = (void *)&gatherv_active;
  gatherv.cmd.xfer_gatherv_int.root = MPIDI_Task_to_endpoint(MPID_VCR_GET_LPID(comm_ptr->vcr, root), 0);
  gatherv.cmd.xfer_gatherv_int.rcvbuf = rbuf;
  gatherv.cmd.xfer_gatherv_int.rtype = rtype;
  gatherv.cmd.xfer_gatherv_int.rtypecounts = (int *) rcounts;
  gatherv.cmd.xfer_gatherv_int.rdispls = (int *) rdispls;


  const pami_metadata_t *my_gatherv_md;

  gatherv.algorithm = mpid->coll_algorithm[PAMI_XFER_GATHERV_INT][0][0];
  my_gatherv_md = &mpid->coll_metadata[PAMI_XFER_GATHERV_INT][0][0];

  MPIDI_Update_last_algorithm(comm_ptr, my_gatherv_md->name);

  MPIDI_Post_coll_t gatherv_post;
  TRACE_ERR("%s gatherv\n", MPIDI_Process.context_post.active>0?"Posting":"Invoking");
  MPIDI_Context_post(MPIDI_Context[0], &gatherv_post.state,
                     MPIDI_Pami_post_wrapper, (void *)&gatherv);
  TRACE_ERR("Gatherv %s\n", MPIDI_Process.context_post.active>0?"posted":"invoked");

  TRACE_ERR("Waiting on active %d\n", gatherv_active);
  MPID_PROGRESS_WAIT_WHILE(gatherv_active);

  if(!rcv_contig || recvok != PAMI_SUCCESS)
  {
    if(recvcontinuous)
   {
      MPIR_Localcopy(rcv_noncontig_buff, recv_size, MPI_CHAR,
                     recvbuf,   totalrecvcount,     recvtype);
    }
    else
    {
      size_t extent;
      MPID_Datatype_get_extent_macro(recvtype,extent);
      for(i=0; i<size; ++i)
      {
        char* scbuf = (char*)rcv_noncontig_buff+ rdispls[i];
        char* rcbuf = (char*)recvbuf + displs[i]*extent;
        MPIR_Localcopy(scbuf, rcounts[i], MPI_CHAR,
                       rcbuf, recvcounts[i], recvtype);
        TRACE_ERR("Pack recv src  extent %zu, displ[%zu]=%zu, count[%zu]=%zu buf[%zu]=%u\n",
                  (size_t)extent, (size_t)i,(size_t)precvdispls[i],(size_t)i,(size_t)precvcounts[i],(size_t)precvdispls[i], *(int*)scbuf);
        TRACE_ERR("Pack recv dest extent %zu, displ[%zu]=%zu, count[%zu]=%zu buf[%zu]=%u\n",
                  (size_t)extent, (size_t)i,(size_t)displs[i],(size_t)i,(size_t)recvcounts[i],(size_t)displs[i], *(int*)rcbuf);
      }

    }
      MPL_free(rcv_noncontig_buff);
      if(rank == root)
      {
         MPL_free(rcounts);
         MPL_free(rdispls);
      }
   }
   if(!snd_contig)  MPL_free(snd_noncontig_buff);


   TRACE_ERR("Leaving MPIDO_Gatherv_optimized\n");
   return MPI_SUCCESS;
}
示例#6
0
int MPIR_Pack_impl(const void *inbuf,
                   int incount,
                   MPI_Datatype datatype,
                   void *outbuf,
                   MPI_Aint outsize,
                   MPI_Aint *position)
{
    int mpi_errno = MPI_SUCCESS;
    MPI_Aint first, last;
    MPID_Segment *segp;
    int contig;
    MPI_Aint dt_true_lb;
    MPI_Aint data_sz;

    if (incount == 0) {
	goto fn_exit;
    }

    /* Handle contig case quickly */
    if (HANDLE_GET_KIND(datatype) == HANDLE_KIND_BUILTIN) {
        contig     = TRUE;
        dt_true_lb = 0;
        data_sz    = incount * MPID_Datatype_get_basic_size(datatype);
    } else {
        MPID_Datatype *dt_ptr;
        MPID_Datatype_get_ptr(datatype, dt_ptr);
	contig     = dt_ptr->is_contig;
        dt_true_lb = dt_ptr->true_lb;
        data_sz    = incount * dt_ptr->size;
    }

    if (contig) {
        MPIU_Memcpy((char *) outbuf + *position, (char *)inbuf + dt_true_lb, data_sz);
        *position = (int)((MPI_Aint)*position + data_sz);
        goto fn_exit;
    }
    

    /* non-contig case */
    
    /* TODO: CHECK RETURN VALUES?? */
    /* TODO: SHOULD THIS ALL BE IN A MPID_PACK??? */
    segp = MPID_Segment_alloc();
    MPIU_ERR_CHKANDJUMP1(segp == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment");
    
    mpi_errno = MPID_Segment_init(inbuf, incount, datatype, segp, 0);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    /* NOTE: the use of buffer values and positions in MPI_Pack and in
     * MPID_Segment_pack are quite different.  See code or docs or something.
     */
    first = 0;
    last  = SEGMENT_IGNORE_LAST;

    /* Ensure that pointer increment fits in a pointer */
    MPID_Ensure_Aint_fits_in_pointer((MPI_VOID_PTR_CAST_TO_MPI_AINT outbuf) +
				     (MPI_Aint) *position);

    MPID_Segment_pack(segp,
		      first,
		      &last,
		      (void *) ((char *) outbuf + *position));

    /* Ensure that calculation fits into an int datatype. */
    MPID_Ensure_Aint_fits_in_int((MPI_Aint)*position + last);

    *position = (int)((MPI_Aint)*position + last);

    MPID_Segment_free(segp);
        
 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
示例#7
0
int MPIDO_Scatterv_simple(const void *sendbuf,
                   const int *sendcounts,
                   const int *displs,
                   MPI_Datatype sendtype,
                   void *recvbuf,
                   int recvcount,
                   MPI_Datatype recvtype,
                   int root,
                   MPID_Comm *comm_ptr,
                   int *mpierrno)
{
#ifndef HAVE_PAMI_IN_PLACE
  if (sendbuf == MPI_IN_PLACE)
  {
    MPID_Abort (NULL, 0, 1, "'MPI_IN_PLACE' requries support for `PAMI_IN_PLACE`");
    return -1;
  }
#endif
  int snd_contig = 1;
  int rcv_contig = 1;
  int send_size = 0, recv_size = 0;
  int ssize = 0;
  MPID_Datatype *dt_ptr = NULL;
  MPI_Aint send_true_lb=0, recv_true_lb=0;
  void *snd_noncontig_buff = NULL, *rcv_noncontig_buff = NULL;
  void *sbuf = NULL, *rbuf = NULL;
  int *sdispls = NULL, *scounts = NULL;
  int sndcount  = 0;
  MPID_Segment segment;
  int tmp, i;
  pami_type_t stype = PAMI_TYPE_NULL;
  const int rank = comm_ptr->rank;
  const int size = comm_ptr->local_size;
  const struct MPIDI_Comm* const mpid = &(comm_ptr->mpid);

  if (rank == root && sendtype != MPI_DATATYPE_NULL && sendcounts[0] >= 0)
  {
    MPIDI_Datatype_get_info(1, sendtype, snd_contig, ssize, dt_ptr, send_true_lb);
    if(MPIDI_Pamix_collsel_advise != NULL && mpid->collsel_fast_query != NULL)
    {
      advisor_algorithm_t advisor_algorithms[1];
      int num_algorithms = MPIDI_Pamix_collsel_advise(mpid->collsel_fast_query, PAMI_XFER_SCATTERV_INT, 64, advisor_algorithms, 1);
      if(num_algorithms)
      {
        if(advisor_algorithms[0].algorithm_type == COLLSEL_EXTERNAL_ALGO)
        {
          return MPIR_Scatterv(sendbuf, sendcounts, displs, sendtype,
                             recvbuf, recvcount, recvtype,
                             root, comm_ptr, mpierrno);
        }
        else if(advisor_algorithms[0].metadata && advisor_algorithms[0].metadata->check_correct.values.asyncflowctl && !(--(comm_ptr->mpid.num_requests)))
        {
          comm_ptr->mpid.num_requests = MPIDI_Process.optimized.num_requests;
          int tmpmpierrno;
          MPIDO_Barrier(comm_ptr, &tmpmpierrno);
        }

      }
    }
  }

  if (recvtype != MPI_DATATYPE_NULL && recvcount >= 0)
  {
    MPIDI_Datatype_get_info(recvcount, recvtype, rcv_contig,
                            recv_size, dt_ptr, recv_true_lb);
    if(MPIDI_Pamix_collsel_advise != NULL && mpid->collsel_fast_query != NULL)
    {
      advisor_algorithm_t advisor_algorithms[1];
      int num_algorithms = MPIDI_Pamix_collsel_advise(mpid->collsel_fast_query, PAMI_XFER_SCATTERV_INT, 64, advisor_algorithms, 1);
      if(num_algorithms)
      {
        if(advisor_algorithms[0].algorithm_type == COLLSEL_EXTERNAL_ALGO)
        {
          return MPIR_Scatterv(sendbuf, sendcounts, displs, sendtype,
                             recvbuf, recvcount, recvtype,
                             root, comm_ptr, mpierrno);
        }
        else if(advisor_algorithms[0].metadata && advisor_algorithms[0].metadata->check_correct.values.asyncflowctl && !(--(comm_ptr->mpid.num_requests)))
        {
          comm_ptr->mpid.num_requests = MPIDI_Process.optimized.num_requests;
          int tmpmpierrno;
          MPIDO_Barrier(comm_ptr, &tmpmpierrno);
        }

      }
    }
  }

   pami_xfer_t scatterv;
   const pami_metadata_t *my_scatterv_md;
   volatile unsigned scatterv_active = 1;

   sbuf = (char *)sendbuf + send_true_lb;
   rbuf = (char *)recvbuf + recv_true_lb;
   scounts = (int*)sendcounts;
   sdispls = (int*)displs;
   if(rank == root)
   {
     if(MPIDI_Datatype_to_pami(sendtype, &stype, -1, NULL, &tmp) != MPI_SUCCESS)
     {
       if (!snd_contig)
       {
          scounts = (int*)MPIU_Malloc(size);
          sdispls = (int*)MPIU_Malloc(size);
          for(i = 0; i < size; i++)
          {
            scounts[i] = ssize * sendcounts[i];
            sdispls[i] = ssize * displs[i];
            send_size += scounts[i];
            sndcount  += sendcounts[i];
          }
          snd_noncontig_buff = MPIU_Malloc(send_size);
          sbuf = snd_noncontig_buff;
          stype = PAMI_TYPE_BYTE;
          if(snd_noncontig_buff == NULL)
          {
             MPID_Abort(NULL, MPI_ERR_NO_SPACE, 1,
                "Fatal:  Cannot allocate pack buffer");
          }
          DLOOP_Offset last = send_size;
          MPID_Segment_init(sendbuf, sndcount, sendtype, &segment, 0);
          MPID_Segment_pack(&segment, 0, &last, snd_noncontig_buff);
       }
     }
     if(recvbuf == MPI_IN_PLACE)
     {
       rbuf = PAMI_IN_PLACE;
     }
   }

   if(recvbuf != MPI_IN_PLACE)
   {
     if (!rcv_contig)
     {
       rcv_noncontig_buff = MPIU_Malloc(recv_size);
       rbuf = rcv_noncontig_buff;
       if(rcv_noncontig_buff == NULL)
       {
          MPID_Abort(NULL, MPI_ERR_NO_SPACE, 1,
             "Fatal:  Cannot allocate pack buffer");
       }
     }
   }

   scatterv.cb_done = cb_scatterv;
   scatterv.cookie = (void *)&scatterv_active;
   scatterv.cmd.xfer_scatterv_int.root = MPIDI_Task_to_endpoint(MPID_VCR_GET_LPID(comm_ptr->vcr, root), 0);

   scatterv.algorithm = mpid->coll_algorithm[PAMI_XFER_SCATTERV_INT][0][0];
   my_scatterv_md = &mpid->coll_metadata[PAMI_XFER_SCATTERV_INT][0][0];
   
   scatterv.cmd.xfer_scatterv_int.rcvbuf = rbuf;
   scatterv.cmd.xfer_scatterv_int.sndbuf = sbuf;
   scatterv.cmd.xfer_scatterv_int.stype = stype;
   scatterv.cmd.xfer_scatterv_int.rtype = PAMI_TYPE_BYTE;/* rtype is ignored when rcvbuf == PAMI_IN_PLACE */
   scatterv.cmd.xfer_scatterv_int.stypecounts = (int *) scounts;
   scatterv.cmd.xfer_scatterv_int.rtypecount = recv_size;
   scatterv.cmd.xfer_scatterv_int.sdispls = (int *) sdispls;


   MPIDI_Update_last_algorithm(comm_ptr, my_scatterv_md->name);


   MPIDI_Post_coll_t scatterv_post;
   TRACE_ERR("%s scatterv\n", MPIDI_Process.context_post.active>0?"Posting":"Invoking");
   MPIDI_Context_post(MPIDI_Context[0], &scatterv_post.state,
                      MPIDI_Pami_post_wrapper, (void *)&scatterv);

   TRACE_ERR("Waiting on active %d\n", scatterv_active);
   MPID_PROGRESS_WAIT_WHILE(scatterv_active);

   if(!rcv_contig)
   {
      MPIR_Localcopy(rcv_noncontig_buff, recv_size, MPI_CHAR,
                        recvbuf,         recvcount,     recvtype);
      MPIU_Free(rcv_noncontig_buff);
   }
   if(!snd_contig) 
   {
     MPIU_Free(snd_noncontig_buff);
     MPIU_Free(scounts);
     MPIU_Free(sdispls);
   }

   TRACE_ERR("Leaving MPIDO_Scatterv_optimized\n");
   return MPI_SUCCESS;
}
示例#8
0
int MPID_nem_ib_lmt_switch_send(struct MPIDI_VC *vc, struct MPID_Request *req)
{
    int mpi_errno = MPI_SUCCESS;
    int dt_contig;
    MPIDI_msg_sz_t data_sz;
    MPID_Datatype *dt_ptr;
    MPI_Aint dt_true_lb;
    MPID_IOV r_cookie = req->ch.lmt_tmp_cookie;
    MPID_nem_ib_lmt_cookie_t *r_cookie_buf = r_cookie.iov_base;

    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_IB_LMT_SWITCH_SEND);
    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_IB_LMT_SWITCH_SEND);

    MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr,
                            dt_true_lb);

    void *write_from_buf;
    if (dt_contig) {
        write_from_buf = req->dev.user_buf;
    }
    else {
        /* see MPIDI_CH3_EagerNoncontigSend (in ch3u_eager.c) */
        req->dev.segment_ptr = MPID_Segment_alloc();
        MPIU_ERR_CHKANDJUMP((req->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER,
                            "**outofmemory");

        MPID_Segment_init(req->dev.user_buf, req->dev.user_count, req->dev.datatype,
                          req->dev.segment_ptr, 0);
        req->dev.segment_first = 0;
        req->dev.segment_size = data_sz;

        MPIDI_msg_sz_t last;
        last = req->dev.segment_size;   /* segment_size is byte offset */
        MPIU_Assert(last > 0);

        REQ_FIELD(req, lmt_pack_buf) = MPIU_Malloc(data_sz);
        MPIU_ERR_CHKANDJUMP(!REQ_FIELD(req, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER,
                            "**outofmemory");

        MPID_Segment_pack(req->dev.segment_ptr, req->dev.segment_first, &last,
                          (char *) (REQ_FIELD(req, lmt_pack_buf)));
        MPIU_Assert(last == req->dev.segment_size);

        write_from_buf = REQ_FIELD(req, lmt_pack_buf);
    }

    //assert(dt_true_lb == 0);
    uint8_t *tailp =
        (uint8_t *) ((uint8_t *) write_from_buf /*+ dt_true_lb */  + data_sz - sizeof(uint8_t));
#if 0
    *is_end_flag_same = (r_cookie_buf->tail == *tailp) ? 1 : 0;
#else
    REQ_FIELD(req, lmt_receiver_tail) = r_cookie_buf->tail;
    REQ_FIELD(req, lmt_sender_tail) = *tailp;
    dprintf("lmt_switch_send,tail on sender=%02x,tail onreceiver=%02x,req=%p\n", *tailp,
            r_cookie_buf->tail, req);
#ifdef MPID_NEM_IB_DEBUG_LMT
    uint8_t *tail_wordp = (uint8_t *) ((uint8_t *) write_from_buf + data_sz - sizeof(uint32_t) * 2);
#endif
    dprintf("lmt_switch_send,tail on sender=%d\n", *tail_wordp);
    fflush(stdout);
#endif

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_IB_LMT_SWITCH_SEND);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
示例#9
0
int
MPIDO_Allgather_simple(const void *sendbuf,
                int sendcount,
                MPI_Datatype sendtype,
                void *recvbuf,
                int recvcount,
                MPI_Datatype recvtype,
                MPID_Comm * comm_ptr,
                int *mpierrno)
{
#ifndef HAVE_PAMI_IN_PLACE
  if (sendbuf == MPI_IN_PLACE)
  {
    MPID_Abort (NULL, 0, 1, "'MPI_IN_PLACE' requries support for `PAMI_IN_PLACE`");
    return -1;
  }
#endif
     /* *********************************
   * Check the nature of the buffers
   * *********************************
   */
   const struct MPIDI_Comm* const mpid = &(comm_ptr->mpid);
   MPID_Datatype * dt_null = NULL;
   void *snd_noncontig_buff = NULL, *rcv_noncontig_buff = NULL;
   MPI_Aint send_true_lb = 0;
   MPI_Aint recv_true_lb = 0;
   int snd_data_contig = 1, rcv_data_contig = 1;
   size_t send_size = 0;
   size_t recv_size = 0;
   MPID_Segment segment;
   volatile unsigned allgather_active = 1;
   const int rank = comm_ptr->rank;
   const int size = comm_ptr->local_size;
#if ASSERT_LEVEL==0
   /* We can't afford the tracing in ndebug/performance libraries */
    const unsigned verbose = 0;
#else
    const unsigned verbose = (MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL) && (rank == 0);
#endif

   const pami_metadata_t *my_md;

   char *rbuf = NULL, *sbuf = NULL;


   if ((sendcount < 1 && sendbuf != MPI_IN_PLACE) || recvcount < 1)
      return MPI_SUCCESS;

   /* Gather datatype information */
   MPIDI_Datatype_get_info(recvcount,
			  recvtype,
			  rcv_data_contig,
			  recv_size,
			  dt_null,
			  recv_true_lb);

   send_size = recv_size;

  if(MPIDI_Pamix_collsel_advise != NULL && mpid->collsel_fast_query != NULL)
  {
    advisor_algorithm_t advisor_algorithms[1];
    int num_algorithms = MPIDI_Pamix_collsel_advise(mpid->collsel_fast_query, PAMI_XFER_ALLGATHER, send_size, advisor_algorithms, 1);
    if(num_algorithms)
    {
      if(advisor_algorithms[0].algorithm_type == COLLSEL_EXTERNAL_ALGO)
      {
        return MPIR_Allgather(sendbuf, sendcount, sendtype,
                              recvbuf, recvcount, recvtype,
                              comm_ptr, mpierrno); 
      }
      else if(advisor_algorithms[0].metadata && advisor_algorithms[0].metadata->check_correct.values.asyncflowctl && !(--(comm_ptr->mpid.num_requests)))
      {
        comm_ptr->mpid.num_requests = MPIDI_Process.optimized.num_requests;
        int tmpmpierrno;
        if(unlikely(verbose))
          fprintf(stderr,"Query barrier required for %s\n", advisor_algorithms[0].metadata->name);
        MPIDO_Barrier(comm_ptr, &tmpmpierrno);
      }
    }
  }

   rbuf = (char *)recvbuf+recv_true_lb;

  if(!rcv_data_contig)
  {
    rcv_noncontig_buff = MPL_malloc(recv_size * size);
    rbuf = rcv_noncontig_buff;
    if(rcv_noncontig_buff == NULL)
    {
      MPID_Abort(NULL, MPI_ERR_NO_SPACE, 1,
                 "Fatal:  Cannot allocate pack buffer");
    }
    if(sendbuf == MPI_IN_PLACE)
    {
      sbuf = PAMI_IN_PLACE;
      size_t extent;
      MPID_Datatype_get_extent_macro(recvtype,extent);
      MPIR_Localcopy(recvbuf + (rank*recvcount*extent), recvcount, recvtype,
                       rcv_noncontig_buff + (rank*recv_size), recv_size,MPI_CHAR);
    }
  }

  if(sendbuf != MPI_IN_PLACE)
   {
     MPIDI_Datatype_get_info(sendcount,
                           sendtype,
                           snd_data_contig,
                           send_size,
                           dt_null,
                           send_true_lb);

     sbuf = (char *)sendbuf+send_true_lb;

     if(!snd_data_contig)
     {
        snd_noncontig_buff = MPL_malloc(send_size);
        sbuf = snd_noncontig_buff;
        if(snd_noncontig_buff == NULL)
        {
           MPID_Abort(NULL, MPI_ERR_NO_SPACE, 1,
              "Fatal:  Cannot allocate pack buffer");
        }
        DLOOP_Offset last = send_size;
        MPID_Segment_init(sendbuf, sendcount, sendtype, &segment, 0);
        MPID_Segment_pack(&segment, 0, &last, snd_noncontig_buff);
     }
  }
  else
    sbuf = PAMI_IN_PLACE;

   TRACE_ERR("Using PAMI-level allgather protocol\n");
   pami_xfer_t allgather;
   allgather.cb_done = allgather_cb_done;
   allgather.cookie = (void *)&allgather_active;
   allgather.cmd.xfer_allgather.rcvbuf = rbuf;
   allgather.cmd.xfer_allgather.sndbuf = sbuf;
   allgather.cmd.xfer_allgather.stype = PAMI_TYPE_BYTE;/* stype is ignored when sndbuf == PAMI_IN_PLACE */
   allgather.cmd.xfer_allgather.rtype = PAMI_TYPE_BYTE;
   allgather.cmd.xfer_allgather.stypecount = send_size;
   allgather.cmd.xfer_allgather.rtypecount = recv_size;
   allgather.algorithm = mpid->coll_algorithm[PAMI_XFER_ALLGATHER][0][0];
   my_md = &mpid->coll_metadata[PAMI_XFER_ALLGATHER][0][0];

   TRACE_ERR("Calling PAMI_Collective with allgather structure\n");
   MPIDI_Post_coll_t allgather_post;
   MPIDI_Context_post(MPIDI_Context[0], &allgather_post.state, MPIDI_Pami_post_wrapper, (void *)&allgather);
   TRACE_ERR("Allgather %s\n", MPIDI_Process.context_post.active>0?"posted":"invoked");

   MPIDI_Update_last_algorithm(comm_ptr, my_md->name);
   MPID_PROGRESS_WAIT_WHILE(allgather_active);
   if(!rcv_data_contig)
   {
      MPIR_Localcopy(rcv_noncontig_buff, recv_size * size, MPI_CHAR,
                        recvbuf,         recvcount,     recvtype);
      MPL_free(rcv_noncontig_buff);
   }
   if(!snd_data_contig)  MPL_free(snd_noncontig_buff);
   TRACE_ERR("Allgather done\n");
   return MPI_SUCCESS;
}
示例#10
0
int MPIDO_Bcast(void *buffer,
                int count,
                MPI_Datatype datatype,
                int root,
                MPID_Comm *comm_ptr,
                int *mpierrno)
{
   TRACE_ERR("in mpido_bcast\n");
   const size_t BCAST_LIMIT =      0x40000000;
   int data_contig, rc;
   void *data_buffer    = NULL,
        *noncontig_buff = NULL;
   volatile unsigned active = 1;
   MPI_Aint data_true_lb = 0;
   MPID_Datatype *data_ptr;
   MPID_Segment segment;
   MPIDI_Post_coll_t bcast_post;
   const struct MPIDI_Comm* const mpid = &(comm_ptr->mpid);
   const int rank = comm_ptr->rank;
#if ASSERT_LEVEL==0
   /* We can't afford the tracing in ndebug/performance libraries */
    const unsigned verbose = 0;
#else
   const unsigned verbose = (MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL) && (rank == 0);
#endif
   const int selected_type = mpid->user_selected_type[PAMI_XFER_BROADCAST];

   /* Must calculate data_size based on count=1 in case it's total size is > integer */
   int data_size_one;
   MPIDI_Datatype_get_info(1, datatype,
			   data_contig, data_size_one, data_ptr, data_true_lb);
   /* do this calculation once and use twice */
   const size_t data_size_sz = (size_t)data_size_one*(size_t)count;
   if(unlikely(verbose))
     fprintf(stderr,"bcast count %d, size %d (%#zX), root %d, buffer %p\n",
	     count,data_size_one, (size_t)data_size_one*(size_t)count, root,buffer);
   if(unlikely( data_size_sz > BCAST_LIMIT) )
   {
      void *new_buffer=buffer;
      int c, new_count = (int)BCAST_LIMIT/data_size_one;
      MPID_assert(new_count > 0);

      for(c=1; ((size_t)c*(size_t)new_count) <= (size_t)count; ++c)
      {
        if ((rc = MPIDO_Bcast(new_buffer,
                        new_count,
                        datatype,
                        root,
                        comm_ptr,
                              mpierrno)) != MPI_SUCCESS)
         return rc;
	 new_buffer = (char*)new_buffer + (size_t)data_size_one*(size_t)new_count;
      }
      new_count = count % new_count; /* 0 is ok, just returns no-op */
      return MPIDO_Bcast(new_buffer,
                         new_count,
                         datatype,
                         root,
                         comm_ptr,
                         mpierrno);
   }

   /* Must use data_size based on count for byte bcast processing.
      Previously calculated as a size_t but large data_sizes were 
      handled above so this cast to int should be fine here.  
   */
   const int data_size = (int)data_size_sz;

   if(selected_type == MPID_COLL_USE_MPICH || data_size == 0)
   {
     if(unlikely(verbose))
       fprintf(stderr,"Using MPICH bcast algorithm\n");
      MPIDI_Update_last_algorithm(comm_ptr,"BCAST_MPICH");
      return MPIR_Bcast_intra(buffer, count, datatype, root, comm_ptr, mpierrno);
   }

   data_buffer = (char *)buffer + data_true_lb;

   if(!data_contig)
   {
      noncontig_buff = MPIU_Malloc(data_size);
      data_buffer = noncontig_buff;
      if(noncontig_buff == NULL)
      {
         MPID_Abort(NULL, MPI_ERR_NO_SPACE, 1,
            "Fatal:  Cannot allocate pack buffer");
      }
      if(rank == root)
      {
         DLOOP_Offset last = data_size;
         MPID_Segment_init(buffer, count, datatype, &segment, 0);
         MPID_Segment_pack(&segment, 0, &last, noncontig_buff);
      }
   }

   pami_xfer_t bcast;
   pami_algorithm_t my_bcast;
   const pami_metadata_t *my_md = (pami_metadata_t *)NULL;
   int queryreq = 0;

   bcast.cb_done = cb_bcast;
   bcast.cookie = (void *)&active;
   bcast.cmd.xfer_broadcast.root = MPIDI_Task_to_endpoint(MPID_VCR_GET_LPID(comm_ptr->vcr, root), 0);
   bcast.algorithm = mpid->user_selected[PAMI_XFER_BROADCAST];
   bcast.cmd.xfer_broadcast.buf = data_buffer;
   bcast.cmd.xfer_broadcast.type = PAMI_TYPE_BYTE;
   /* Needs to be sizeof(type)*count since we are using bytes as * the generic type */
   bcast.cmd.xfer_broadcast.typecount = data_size;

   if(selected_type == MPID_COLL_OPTIMIZED)
   {
      TRACE_ERR("Optimized bcast (%s) and (%s) were pre-selected\n",
         mpid->opt_protocol_md[PAMI_XFER_BROADCAST][0].name,
         mpid->opt_protocol_md[PAMI_XFER_BROADCAST][1].name);

      if(mpid->cutoff_size[PAMI_XFER_BROADCAST][1] != 0)/* SSS: There is FCA cutoff (FCA only sets cutoff for [PAMI_XFER_BROADCAST][1]) */
      {
        if(data_size <= mpid->cutoff_size[PAMI_XFER_BROADCAST][1])
        {
          my_bcast = mpid->opt_protocol[PAMI_XFER_BROADCAST][1];
          my_md = &mpid->opt_protocol_md[PAMI_XFER_BROADCAST][1];
          queryreq = mpid->must_query[PAMI_XFER_BROADCAST][1];
        }
        else
        {
          return MPIR_Bcast_intra(buffer, count, datatype, root, comm_ptr, mpierrno);
        }
      }

      if(data_size > mpid->cutoff_size[PAMI_XFER_BROADCAST][0])
      {
         my_bcast = mpid->opt_protocol[PAMI_XFER_BROADCAST][1];
         my_md = &mpid->opt_protocol_md[PAMI_XFER_BROADCAST][1];
         queryreq = mpid->must_query[PAMI_XFER_BROADCAST][1];
      }
      else
      {
         my_bcast = mpid->opt_protocol[PAMI_XFER_BROADCAST][0];
         my_md = &mpid->opt_protocol_md[PAMI_XFER_BROADCAST][0];
         queryreq = mpid->must_query[PAMI_XFER_BROADCAST][0];
      }
   }
   else
   {
      TRACE_ERR("Bcast (%s) was specified by user\n",
         mpid->user_metadata[PAMI_XFER_BROADCAST].name);
      my_bcast =  mpid->user_selected[PAMI_XFER_BROADCAST];
      my_md = &mpid->user_metadata[PAMI_XFER_BROADCAST];
      queryreq = selected_type;
   }

   bcast.algorithm = my_bcast;

   if(unlikely(queryreq == MPID_COLL_ALWAYS_QUERY ||
               queryreq == MPID_COLL_CHECK_FN_REQUIRED))
   {
      metadata_result_t result = {0};
      TRACE_ERR("querying bcast protocol %s, type was: %d\n",
                my_md->name, queryreq);
      if(my_md->check_fn != NULL) /* calling the check fn is sufficient */
      {
         metadata_result_t result = {0};
         result = my_md->check_fn(&bcast);
         result.check.nonlocal = 0; /* #warning REMOVE THIS WHEN IMPLEMENTED */
      } 
      else /* no check_fn, manually look at the metadata fields */
      {
         TRACE_ERR("Optimzed selection line %d\n",__LINE__);
         /* Check if the message range if restricted */
         if(my_md->check_correct.values.rangeminmax)
         {
            if((my_md->range_lo <= data_size) &&
               (my_md->range_hi >= data_size))
               ; /* ok, algorithm selected */
            else
            {
               result.check.range = 1;
               if(unlikely(verbose))
               {   
                  fprintf(stderr,"message size (%u) outside range (%zu<->%zu) for %s.\n",
                          data_size,
                          my_md->range_lo,
                          my_md->range_hi,
                          my_md->name);
               }
            }
         }
         /* \todo check the rest of the metadata */
      }
      TRACE_ERR("bitmask: %#X\n", result.bitmask);
      if(result.bitmask)
      {
         if(unlikely(verbose))
            fprintf(stderr,"Using MPICH bcast algorithm - query fn failed\n");
         MPIDI_Update_last_algorithm(comm_ptr,"BCAST_MPICH");
         return MPIR_Bcast_intra(buffer, count, datatype, root, comm_ptr, mpierrno);
      }
      if(my_md->check_correct.values.asyncflowctl && !(--(comm_ptr->mpid.num_requests))) 
      { 
         comm_ptr->mpid.num_requests = MPIDI_Process.optimized.num_requests;
         int tmpmpierrno;   
         if(unlikely(verbose))
            fprintf(stderr,"Query barrier required for %s\n", my_md->name);
         MPIDO_Barrier(comm_ptr, &tmpmpierrno);
      }
   }

   if(unlikely(verbose))
   {
      unsigned long long int threadID;
      MPIU_Thread_id_t tid;
      MPIU_Thread_self(&tid);
      threadID = (unsigned long long int)tid;
      fprintf(stderr,"<%llx> Using protocol %s for bcast on %u\n", 
              threadID,
              my_md->name,
              (unsigned) comm_ptr->context_id);
   }

   MPIDI_Context_post(MPIDI_Context[0], &bcast_post.state, MPIDI_Pami_post_wrapper, (void *)&bcast);
   MPIDI_Update_last_algorithm(comm_ptr, my_md->name);
   MPID_PROGRESS_WAIT_WHILE(active);
   TRACE_ERR("bcast done\n");

   if(!data_contig)
   {
      if(rank != root)
         MPIR_Localcopy(noncontig_buff, data_size, MPI_CHAR,
                        buffer,         count,     datatype);
      MPIU_Free(noncontig_buff);
   }

   TRACE_ERR("leaving bcast\n");
   return 0;
}
示例#11
0
int MPID_nem_ib_lmt_initiate_lmt(struct MPIDI_VC *vc, union MPIDI_CH3_Pkt *rts_pkt,
                                 struct MPID_Request *req)
{
    int mpi_errno = MPI_SUCCESS;
    int dt_contig;
    MPIDI_msg_sz_t data_sz;
    MPID_Datatype *dt_ptr;
    MPI_Aint dt_true_lb;
#if 0
    MPID_nem_ib_vc_area *vc_ib = VC_IB(vc);
#endif

    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_IB_LMT_INITIATE_LMT);
    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_IB_LMT_INITIATE_LMT);

    dprintf("lmt_initiate_lmt,enter,%d->%d,req=%p\n", MPID_nem_ib_myrank, vc->pg_rank, req);

    /* obtain dt_true_lb */
    /* see MPIDI_Datatype_get_info(in, in, out, out, out, out) (in src/mpid/ch3/include/mpidimpl.h) */
    MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr,
                            dt_true_lb);

    /* FIXME: who frees s_cookie_buf? */
    /* malloc memory area for cookie. auto variable is NG because isend does not copy payload */
    MPID_nem_ib_lmt_cookie_t *s_cookie_buf =
        (MPID_nem_ib_lmt_cookie_t *) MPIU_Malloc(sizeof(MPID_nem_ib_lmt_cookie_t));

    /* remember address to "free" when receiving DONE from receiver */
    req->ch.s_cookie = s_cookie_buf;

    /* see MPIDI_CH3_PktHandler_RndvClrToSend (in src/mpid/ch3/src/ch3u_rndv.c) */
    //assert(dt_true_lb == 0);
    void *write_from_buf;
    if (dt_contig) {
        write_from_buf = (void *) ((char *) req->dev.user_buf + dt_true_lb);
    }
    else {
        /* see MPIDI_CH3_EagerNoncontigSend (in ch3u_eager.c) */
        req->dev.segment_ptr = MPID_Segment_alloc();
        MPIU_ERR_CHKANDJUMP((req->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER,
                            "**outofmemory");

        MPID_Segment_init(req->dev.user_buf, req->dev.user_count, req->dev.datatype,
                          req->dev.segment_ptr, 0);
        req->dev.segment_first = 0;
        req->dev.segment_size = data_sz;

        MPIDI_msg_sz_t last;
        last = req->dev.segment_size;   /* segment_size is byte offset */
        MPIU_Assert(last > 0);
        REQ_FIELD(req, lmt_pack_buf) = MPIU_Malloc((size_t) req->dev.segment_size);
        MPIU_ERR_CHKANDJUMP(!REQ_FIELD(req, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER,
                            "**outofmemory");
        MPID_Segment_pack(req->dev.segment_ptr, req->dev.segment_first, &last,
                          (char *) (REQ_FIELD(req, lmt_pack_buf)));
        MPIU_Assert(last == req->dev.segment_size);
        write_from_buf = REQ_FIELD(req, lmt_pack_buf);
    }
    dprintf
        ("lmt_initate_lmt,dt_contig=%d,write_from_buf=%p,req->dev.user_buf=%p,REQ_FIELD(req, lmt_pack_buf)=%p\n",
         dt_contig, write_from_buf, req->dev.user_buf, REQ_FIELD(req, lmt_pack_buf));

#ifdef HAVE_LIBDCFA
#else
    s_cookie_buf->addr = write_from_buf;
#endif
    /* put sz, see MPID_nem_lmt_RndvSend (in src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c) */
    /* TODO remove sz field
     *   pkt_RTS_handler (in src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c)
     * rreq->ch.lmt_data_sz = rts_pkt->data_sz; */
    //s_cookie_buf->sz = (uint32_t)((MPID_nem_pkt_lmt_rts_t*)rts_pkt)->data_sz;

    /* preserve and put tail, because tail magic is written on the tail of payload
     * because we don't want to add another SGE or RDMA command */
    MPIU_Assert(((MPID_nem_pkt_lmt_rts_t *) rts_pkt)->data_sz == data_sz);
    s_cookie_buf->tail = *((uint8_t *) ((uint8_t *) write_from_buf + data_sz - sizeof(uint8_t)));
    /* prepare magic */
    //*((uint32_t*)(write_from_buf + data_sz - sizeof(tailmagic_t))) = MPID_NEM_IB_COM_MAGIC;

#if 0   /* moving to packet header */   /* embed RDMA-write-to buffer occupancy information */
    dprintf("lmt_initiate_lmt,rsr_seq_num_tail=%d\n", vc_ib->ibcom->rsr_seq_num_tail);
    /* embed RDMA-write-to buffer occupancy information */
    s_cookie_buf->seq_num_tail = vc_ib->ibcom->rsr_seq_num_tail;

    /* remember the last one sent */
    vc_ib->ibcom->rsr_seq_num_tail_last_sent = vc_ib->ibcom->rsr_seq_num_tail;
#endif

    int post_num;
    uint32_t max_msg_sz;
    MPID_nem_ib_vc_area *vc_ib = VC_IB(vc);
    MPID_nem_ib_com_get_info_conn(vc_ib->sc->fd, MPID_NEM_IB_COM_INFOKEY_PATTR_MAX_MSG_SZ,
                                  &max_msg_sz, sizeof(uint32_t));

    /* Type of max_msg_sz is uint32_t. */
    post_num = (data_sz + (long) max_msg_sz - 1) / (long) max_msg_sz;

    s_cookie_buf->max_msg_sz = max_msg_sz;
    s_cookie_buf->seg_seq_num = 1;
    s_cookie_buf->seg_num = post_num;

    REQ_FIELD(req, buf.from) = write_from_buf;
    REQ_FIELD(req, data_sz) = data_sz;
    REQ_FIELD(req, seg_seq_num) = 1;    // only send 1st-segment, even if there are some segments.
    REQ_FIELD(req, seg_num) = post_num;
    REQ_FIELD(req, max_msg_sz) = max_msg_sz;

    long length;
    if (post_num > 1) {
        length = max_msg_sz;
    }
    else {
        length = data_sz;
    }
    /* put IB rkey */
    struct MPID_nem_ib_com_reg_mr_cache_entry_t *mr_cache =
        MPID_nem_ib_com_reg_mr_fetch(write_from_buf, length, 0, MPID_NEM_IB_COM_REG_MR_GLOBAL);
    MPIU_ERR_CHKANDJUMP(!mr_cache, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_ib_com_reg_mr_fetch");
    struct ibv_mr *mr = mr_cache->mr;
    REQ_FIELD(req, lmt_mr_cache) = (void *) mr_cache;
#ifdef HAVE_LIBDCFA
    s_cookie_buf->addr = (void *) mr->host_addr;
    dprintf("lmt_initiate_lmt,s_cookie_buf->addr=%p\n", s_cookie_buf->addr);
#endif
    s_cookie_buf->rkey = mr->rkey;
    dprintf("lmt_initiate_lmt,tail=%02x,mem-tail=%p,%02x,sz=%ld,raddr=%p,rkey=%08x\n",
            s_cookie_buf->tail, write_from_buf + data_sz - sizeof(uint8_t),
            *((uint8_t *) (write_from_buf + data_sz - sizeof(uint8_t))), data_sz,
            s_cookie_buf->addr, s_cookie_buf->rkey);
    /* send cookie. rts_pkt as the MPI-header, s_cookie_buf as the payload */
    MPID_nem_lmt_send_RTS(vc, (MPID_nem_pkt_lmt_rts_t *) rts_pkt, s_cookie_buf,
                          sizeof(MPID_nem_ib_lmt_cookie_t));

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_IB_LMT_INITIATE_LMT);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
示例#12
0
int MPIDO_Bcast_simple(void *buffer,
                int count,
                MPI_Datatype datatype,
                int root,
                MPID_Comm *comm_ptr,
                int *mpierrno)
{
   TRACE_ERR("Entering MPIDO_Bcast_optimized\n");

   int data_contig;
   void *data_buffer    = NULL,
        *noncontig_buff = NULL;
   volatile unsigned active = 1;
   MPI_Aint data_true_lb = 0;
   MPID_Datatype *data_ptr;
   MPID_Segment segment;
   MPIDI_Post_coll_t bcast_post;
   const struct MPIDI_Comm* const mpid = &(comm_ptr->mpid);
   const int rank = comm_ptr->rank;

   /* Must calculate data_size based on count=1 in case it's total size is > integer */
   int data_size_one;
   MPIDI_Datatype_get_info(1, datatype,
			   data_contig, data_size_one, data_ptr, data_true_lb);
   if(MPIDI_Pamix_collsel_advise != NULL && mpid->collsel_fast_query != NULL)
   {
     advisor_algorithm_t advisor_algorithms[1];
     int num_algorithms = MPIDI_Pamix_collsel_advise(mpid->collsel_fast_query, PAMI_XFER_BROADCAST, data_size_one * count, advisor_algorithms, 1);
     if(num_algorithms)
     {
       if(advisor_algorithms[0].algorithm_type == COLLSEL_EXTERNAL_ALGO)
       {
         return MPIR_Bcast_intra(buffer, count, datatype, root, comm_ptr, mpierrno);
       }
     }
   }

   const int data_size = data_size_one*(size_t)count;

   data_buffer = (char *)buffer + data_true_lb;

   if(!data_contig)
   {
      noncontig_buff = MPIU_Malloc(data_size);
      data_buffer = noncontig_buff;
      if(noncontig_buff == NULL)
      {
         MPID_Abort(NULL, MPI_ERR_NO_SPACE, 1,
            "Fatal:  Cannot allocate pack buffer");
      }
      if(rank == root)
      {
         DLOOP_Offset last = data_size;
         MPID_Segment_init(buffer, count, datatype, &segment, 0);
         MPID_Segment_pack(&segment, 0, &last, noncontig_buff);
      }
   }

   pami_xfer_t bcast;
   const pami_metadata_t *my_bcast_md;
   int queryreq = 0;

   bcast.cb_done = cb_bcast;
   bcast.cookie = (void *)&active;
   bcast.cmd.xfer_broadcast.root = MPIDI_Task_to_endpoint(MPID_VCR_GET_LPID(comm_ptr->vcr, root), 0);
   bcast.algorithm = mpid->coll_algorithm[PAMI_XFER_BROADCAST][0][0];
   bcast.cmd.xfer_broadcast.buf = data_buffer;
   bcast.cmd.xfer_broadcast.type = PAMI_TYPE_BYTE;
   /* Needs to be sizeof(type)*count since we are using bytes as * the generic type */
   bcast.cmd.xfer_broadcast.typecount = data_size;
   my_bcast_md = &mpid->coll_metadata[PAMI_XFER_BROADCAST][0][0];

   MPIDI_Context_post(MPIDI_Context[0], &bcast_post.state, MPIDI_Pami_post_wrapper, (void *)&bcast);
   MPIDI_Update_last_algorithm(comm_ptr, my_bcast_md->name);
   MPID_PROGRESS_WAIT_WHILE(active);
   TRACE_ERR("bcast done\n");

   if(!data_contig)
   {
      if(rank != root)
         MPIR_Localcopy(noncontig_buff, data_size, MPI_CHAR,
                        buffer,         count,     datatype);
      MPIU_Free(noncontig_buff);
   }

   TRACE_ERR("Exiting MPIDO_Bcast_optimized\n");
   return 0;
}
示例#13
0
int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
                   void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype)
{
    int mpi_errno = MPI_SUCCESS;
    int sendtype_iscontig, recvtype_iscontig;
    MPI_Aint sendsize, recvsize, sdata_sz, rdata_sz, copy_sz;
    MPI_Aint true_extent, sendtype_true_lb, recvtype_true_lb;
    MPIU_CHKLMEM_DECL(1);
    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_LOCALCOPY);

    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_LOCALCOPY);

    MPID_Datatype_get_size_macro(sendtype, sendsize);
    MPID_Datatype_get_size_macro(recvtype, recvsize);

    sdata_sz = sendsize * sendcount;
    rdata_sz = recvsize * recvcount;

    /* if there is no data to copy, bail out */
    if (!sdata_sz || !rdata_sz)
        goto fn_exit;

#if defined(HAVE_ERROR_CHECKING)
    if (sdata_sz > rdata_sz) {
        MPIU_ERR_SET2(mpi_errno, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", sdata_sz, rdata_sz);
        copy_sz = rdata_sz;
    }
    else
#endif /* HAVE_ERROR_CHECKING */
        copy_sz = sdata_sz;

    /* Builtin types is the common case; optimize for it */
    if ((HANDLE_GET_KIND(sendtype) == HANDLE_KIND_BUILTIN) &&
        HANDLE_GET_KIND(recvtype) == HANDLE_KIND_BUILTIN) {
        MPIU_Memcpy(recvbuf, sendbuf, copy_sz);
        goto fn_exit;
    }

    MPIR_Datatype_iscontig(sendtype, &sendtype_iscontig);
    MPIR_Datatype_iscontig(recvtype, &recvtype_iscontig);

    MPIR_Type_get_true_extent_impl(sendtype, &sendtype_true_lb, &true_extent);
    MPIR_Type_get_true_extent_impl(recvtype, &recvtype_true_lb, &true_extent);

    if (sendtype_iscontig && recvtype_iscontig)
    {
#if defined(HAVE_ERROR_CHECKING)
        MPIU_ERR_CHKMEMCPYANDJUMP(mpi_errno,
                                  ((char *)recvbuf + recvtype_true_lb),
                                  ((char *)sendbuf + sendtype_true_lb),
                                  copy_sz);
#endif
        MPIU_Memcpy(((char *) recvbuf + recvtype_true_lb),
               ((char *) sendbuf + sendtype_true_lb),
               copy_sz);
    }
    else if (sendtype_iscontig)
    {
        MPID_Segment seg;
	MPI_Aint last;

	MPID_Segment_init(recvbuf, recvcount, recvtype, &seg, 0);
	last = copy_sz;
	MPID_Segment_unpack(&seg, 0, &last, (char*)sendbuf + sendtype_true_lb);
        MPIU_ERR_CHKANDJUMP(last != copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch");
    }
    else if (recvtype_iscontig)
    {
        MPID_Segment seg;
	MPI_Aint last;

	MPID_Segment_init(sendbuf, sendcount, sendtype, &seg, 0);
	last = copy_sz;
	MPID_Segment_pack(&seg, 0, &last, (char*)recvbuf + recvtype_true_lb);
        MPIU_ERR_CHKANDJUMP(last != copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch");
    }
    else
    {
	char * buf;
	MPIDI_msg_sz_t buf_off;
	MPID_Segment sseg;
	MPIDI_msg_sz_t sfirst;
	MPID_Segment rseg;
	MPIDI_msg_sz_t rfirst;

        MPIU_CHKLMEM_MALLOC(buf, char *, COPY_BUFFER_SZ, mpi_errno, "buf");

	MPID_Segment_init(sendbuf, sendcount, sendtype, &sseg, 0);
	MPID_Segment_init(recvbuf, recvcount, recvtype, &rseg, 0);

	sfirst = 0;
	rfirst = 0;
	buf_off = 0;
	
	while (1)
	{
	    MPI_Aint last;
	    char * buf_end;

	    if (copy_sz - sfirst > COPY_BUFFER_SZ - buf_off)
	    {
		last = sfirst + (COPY_BUFFER_SZ - buf_off);
	    }
	    else
	    {
		last = copy_sz;
	    }
	    
	    MPID_Segment_pack(&sseg, sfirst, &last, buf + buf_off);
	    MPIU_Assert(last > sfirst);
	    
	    buf_end = buf + buf_off + (last - sfirst);
	    sfirst = last;
	    
	    MPID_Segment_unpack(&rseg, rfirst, &last, buf);
	    MPIU_Assert(last > rfirst);

	    rfirst = last;

	    if (rfirst == copy_sz)
	    {
		/* successful completion */
		break;
	    }

            /* if the send side finished, but the recv side couldn't unpack it, there's a datatype mismatch */
            MPIU_ERR_CHKANDJUMP(sfirst == copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch");        

            /* if not all data was unpacked, copy it to the front of the buffer for next time */
	    buf_off = sfirst - rfirst;
	    if (buf_off > 0)
	    {
		memmove(buf, buf_end - buf_off, buf_off);
	    }
	}
    }
    
    
  fn_exit:
    MPIU_CHKLMEM_FREEALL();
    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_LOCALCOPY);
    return mpi_errno;

  fn_fail:
    goto fn_exit;
}
示例#14
0
int MPID_nem_mxm_SendNoncontig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
                               MPIDI_msg_sz_t hdr_sz)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_msg_sz_t last;
    MPID_nem_mxm_vc_area *vc_area = NULL;
    MPID_nem_mxm_req_area *req_area = NULL;

    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_SENDNONCONTIGMSG);
    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_SENDNONCONTIGMSG);

    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "MPID_nem_mxm_iSendNoncontig");

    MPIU_Memcpy(&(sreq->dev.pending_pkt), (char *) hdr, sizeof(MPIDI_CH3_Pkt_t));

    _dbg_mxm_output(5,
                    "SendNoncontig ========> Sending ADI msg (to=%d type=%d) for req %p (data_size %d, %d) \n",
                    vc->pg_rank, sreq->dev.pending_pkt.type, sreq, sizeof(MPIDI_CH3_Pkt_t),
                    sreq->dev.segment_size - sreq->dev.segment_first);

    vc_area = VC_BASE(vc);
    req_area = REQ_BASE(sreq);

    req_area->ctx = sreq;
    req_area->iov_buf = req_area->tmp_buf;
    req_area->iov_count = 0;

    req_area->iov_buf[req_area->iov_count].ptr = (void *) &(sreq->dev.pending_pkt);
    req_area->iov_buf[req_area->iov_count].length = sizeof(MPIDI_CH3_Pkt_t);
    (req_area->iov_count)++;

    if (sreq->dev.ext_hdr_ptr != NULL) {
        req_area->iov_buf[req_area->iov_count].ptr = (void *) (sreq->dev.ext_hdr_ptr);
        req_area->iov_buf[req_area->iov_count].length = sreq->dev.ext_hdr_sz;
        (req_area->iov_count)++;
    }

    last = sreq->dev.segment_size;

    /* NOTE: currently upper layer never pass packet with data that has
     * either "last <= 0" or "last-sreq->dev.segment_first <=0" to this
     * layer. In future, if upper layer passes such kind of packet, the
     * judgement of the following IF branch needs to be modified. */
    MPIU_Assert(last > 0 && last - sreq->dev.segment_first > 0);

    if (last > 0) {
        sreq->dev.tmpbuf = MPIU_Malloc((size_t) (sreq->dev.segment_size - sreq->dev.segment_first));
        MPIU_Assert(sreq->dev.tmpbuf);
        MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.tmpbuf);
        MPIU_Assert(last == sreq->dev.segment_size);

        req_area->iov_buf[req_area->iov_count].ptr = sreq->dev.tmpbuf;
        req_area->iov_buf[req_area->iov_count].length = last - sreq->dev.segment_first;
        (req_area->iov_count)++;
    }

    vc_area->pending_sends += 1;
    sreq->ch.vc = vc;
    sreq->ch.noncontig = TRUE;

    mpi_errno = _mxm_isend(vc_area->mxm_ep, req_area, MXM_MPICH_ISEND_AM,
                           mxm_obj->mxm_mq, mxm_obj->mxm_rank, MXM_MPICH_HID_ADI_MSG, 0, 0);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_SENDNONCONTIGMSG);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
示例#15
0
static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest,
                    int tag, MPID_Comm *comm, int context_offset, struct MPID_Request **request)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc);
    int ret;
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype *dt_ptr;
    MPID_Request *sreq = NULL;
    ptl_me_t me;
    int initial_iov_count, remaining_iov_count;
    ptl_md_t md;
    MPI_Aint last;
    MPIU_CHKPMEM_DECL(2);
    MPIDI_STATE_DECL(MPID_STATE_SEND_MSG);

    MPIDI_FUNC_ENTER(MPID_STATE_SEND_MSG);

    MPID_nem_ptl_request_create_sreq(sreq, mpi_errno, comm);
    sreq->dev.match.parts.rank = dest;
    sreq->dev.match.parts.tag = tag;
    sreq->dev.match.parts.context_id = comm->context_id + context_offset;
    sreq->ch.vc = vc;

    if (!vc_ptl->id_initialized) {
        mpi_errno = MPID_nem_ptl_init_id(vc);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    }

    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
    MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "count="MPI_AINT_FMT_DEC_SPEC" datatype=%#x contig=%d data_sz=%lu", count, datatype, dt_contig, data_sz));

    if (data_sz <= PTL_LARGE_THRESHOLD) {
        /* Small message.  Send all data eagerly */
        if (dt_contig) {
            void *start = (char *)buf + dt_true_lb;
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small contig message");
            REQ_PTL(sreq)->event_handler = handler_send;
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "&REQ_PTL(sreq)->event_handler = %p", &(REQ_PTL(sreq)->event_handler));
            if (start == NULL)
                ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)&dummy, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                            NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                            NPTL_HEADER(ssend_flag, data_sz));
            else
                ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)start, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                            NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                            NPTL_HEADER(ssend_flag, data_sz));
            MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
            DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz));
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.nid = %#x", vc_ptl->id.phys.nid);
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.pid = %#x", vc_ptl->id.phys.pid);
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "sreq = %p", sreq);
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "vc_ptl->pt = %d", vc_ptl->pt);
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "REQ_PTL(sreq)->event_handler = %p", REQ_PTL(sreq)->event_handler);
            goto fn_exit;
        }

        /* noncontig data */
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small noncontig message");
        sreq->dev.segment_ptr = MPID_Segment_alloc();
        MPIR_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
        MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
        sreq->dev.segment_first = 0;
        sreq->dev.segment_size = data_sz;

        last = sreq->dev.segment_size;
        sreq->dev.iov_count = MPL_IOV_LIMIT;
        MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count);

        if (last == sreq->dev.segment_size) {
            /* IOV is able to describe entire message */
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    entire message fits in IOV");
            md.start = sreq->dev.iov;
            md.length = sreq->dev.iov_count;
            md.options = PTL_IOVEC;
            md.eq_handle = MPIDI_nem_ptl_origin_eq;
            md.ct_handle = PTL_CT_NONE;
            ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md);
            MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret));

            REQ_PTL(sreq)->event_handler = handler_send;
            ret = MPID_nem_ptl_rptl_put(REQ_PTL(sreq)->md, 0, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                        NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                        NPTL_HEADER(ssend_flag, data_sz));
            MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
            DBG_MSG_PUT("sreq", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz));
            goto fn_exit;
        }

        /* IOV is not long enough to describe entire message */
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    IOV too long: using bounce buffer");
        MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "chunk_buffer");
        MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
        sreq->dev.segment_first = 0;
        last = data_sz;
        MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, REQ_PTL(sreq)->chunk_buffer[0]);
        MPIU_Assert(last == sreq->dev.segment_size);
        REQ_PTL(sreq)->event_handler = handler_send;
        ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)REQ_PTL(sreq)->chunk_buffer[0], data_sz, PTL_NO_ACK_REQ,
                                    vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                    NPTL_HEADER(ssend_flag, data_sz));
        MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
        DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz));
        goto fn_exit;
    }

    /* Large message.  Send first chunk of data and let receiver get the rest */
    if (dt_contig) {
        /* create ME for buffer so receiver can issue a GET for the data */
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large contig message");
        big_meappend((char *)buf + dt_true_lb + PTL_LARGE_THRESHOLD, data_sz - PTL_LARGE_THRESHOLD, vc,
                     NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), sreq);

        REQ_PTL(sreq)->event_handler = handler_send;
        ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)((char *)buf + dt_true_lb), PTL_LARGE_THRESHOLD, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                    NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                    NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
        MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
        DBG_MSG_PUT("global", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
        goto fn_exit;
    }

    /* Large noncontig data */
    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large noncontig message");
    sreq->dev.segment_ptr = MPID_Segment_alloc();
    MPIR_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
    MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
    sreq->dev.segment_first = 0;
    sreq->dev.segment_size = data_sz;

    last = PTL_LARGE_THRESHOLD;
    sreq->dev.iov_count = MPL_IOV_LIMIT;
    MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count);

    initial_iov_count = sreq->dev.iov_count;
    sreq->dev.segment_first = last;

    if (last == PTL_LARGE_THRESHOLD) {
        /* first chunk of message fits into IOV */
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    first chunk fits in IOV");
        if (initial_iov_count < MPL_IOV_LIMIT) {
            /* There may be space for the rest of the message in this IOV */
            sreq->dev.iov_count = MPL_IOV_LIMIT - sreq->dev.iov_count;
            last = sreq->dev.segment_size;

            MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last,
                                     &sreq->dev.iov[initial_iov_count], &sreq->dev.iov_count);
            remaining_iov_count = sreq->dev.iov_count;

            if (last == sreq->dev.segment_size && last <= MPIDI_nem_ptl_ni_limits.max_msg_size + PTL_LARGE_THRESHOLD) {
                /* Entire message fit in one IOV */
                int was_incomplete;

                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    rest of message fits in one IOV");
                /* Create ME for remaining data */
                me.start = &sreq->dev.iov[initial_iov_count];
                me.length = remaining_iov_count;
                me.ct_handle = PTL_CT_NONE;
                me.uid = PTL_UID_ANY;
                me.options = ( PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE |
                               PTL_ME_EVENT_UNLINK_DISABLE | PTL_IOVEC );
                me.match_id = vc_ptl->id;
                me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank);
                me.ignore_bits = 0;
                me.min_free = 0;

                MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->get_me_p, ptl_handle_me_t *, sizeof(ptl_handle_me_t), mpi_errno, "get_me_p");

                ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq,
                                  &REQ_PTL(sreq)->get_me_p[0]);
                MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
                DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq);
                /* increment the cc for the get operation */
                MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete);
                MPIU_Assert(was_incomplete);

                /* Create MD for first chunk */
                md.start = sreq->dev.iov;
                md.length = initial_iov_count;
                md.options = PTL_IOVEC;
                md.eq_handle = MPIDI_nem_ptl_origin_eq;
                md.ct_handle = PTL_CT_NONE;
                ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md);
                MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret));

                REQ_PTL(sreq)->event_handler = handler_send;
                ret = MPID_nem_ptl_rptl_put(REQ_PTL(sreq)->md, 0, PTL_LARGE_THRESHOLD, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                            NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                            NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
                MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
                DBG_MSG_PUT("req", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
                goto fn_exit;
            }
示例#16
0
文件: ch3u_buffer.c 项目: tjhei/fgmpi
void MPIDI_CH3U_Buffer_copy(
    const void * const sbuf, MPI_Aint scount, MPI_Datatype sdt, int * smpi_errno,
    void * const rbuf, MPI_Aint rcount, MPI_Datatype rdt, MPIDI_msg_sz_t * rsz,
    int * rmpi_errno)
{
    int sdt_contig;
    int rdt_contig;
    MPI_Aint sdt_true_lb, rdt_true_lb;
    MPIDI_msg_sz_t sdata_sz;
    MPIDI_msg_sz_t rdata_sz;
    MPID_Datatype * sdt_ptr;
    MPID_Datatype * rdt_ptr;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_BUFFER_COPY);
    MPIDI_STATE_DECL(MPID_STATE_MEMCPY);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_BUFFER_COPY);
    *smpi_errno = MPI_SUCCESS;
    *rmpi_errno = MPI_SUCCESS;

    MPIDI_Datatype_get_info(scount, sdt, sdt_contig, sdata_sz, sdt_ptr, sdt_true_lb);
    MPIDI_Datatype_get_info(rcount, rdt, rdt_contig, rdata_sz, rdt_ptr, rdt_true_lb);

    /* --BEGIN ERROR HANDLING-- */
    if (sdata_sz > rdata_sz)
    {
	MPIU_DBG_MSG_FMT(CH3_OTHER,TYPICAL,(MPIU_DBG_FDEST,
	    "message truncated, sdata_sz=" MPIDI_MSG_SZ_FMT " rdata_sz=" MPIDI_MSG_SZ_FMT,
			  sdata_sz, rdata_sz));
	sdata_sz = rdata_sz;
	*rmpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", sdata_sz, rdata_sz );
    }
    /* --END ERROR HANDLING-- */
    
    if (sdata_sz == 0)
    {
	*rsz = 0;
	goto fn_exit;
    }
    
    if (sdt_contig && rdt_contig)
    {
	MPIDI_FUNC_ENTER(MPID_STATE_MEMCPY);
	MPIU_Memcpy((char *)rbuf + rdt_true_lb, (const char *)sbuf + sdt_true_lb, sdata_sz);
	MPIDI_FUNC_EXIT(MPID_STATE_MEMCPY);
	*rsz = sdata_sz;
    }
    else if (sdt_contig)
    {
	MPID_Segment seg;
	MPI_Aint last;

	MPID_Segment_init(rbuf, rcount, rdt, &seg, 0);
	last = sdata_sz;
	MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, 
                          "pre-unpack last=" MPIDI_MSG_SZ_FMT, last ));
	MPID_Segment_unpack(&seg, 0, &last, (char*)sbuf + sdt_true_lb);
	MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
			 "pre-unpack last=" MPIDI_MSG_SZ_FMT, last ));
	/* --BEGIN ERROR HANDLING-- */
	if (last != sdata_sz)
	{
	    *rmpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**dtypemismatch", 0);
	}
	/* --END ERROR HANDLING-- */

	*rsz = last;
    }
    else if (rdt_contig)
    {
	MPID_Segment seg;
	MPI_Aint last;

	MPID_Segment_init(sbuf, scount, sdt, &seg, 0);
	last = sdata_sz;
	MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
			       "pre-pack last=" MPIDI_MSG_SZ_FMT, last ));
	MPID_Segment_pack(&seg, 0, &last, (char*)rbuf + rdt_true_lb);
	MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
			    "post-pack last=" MPIDI_MSG_SZ_FMT, last ));
	/* --BEGIN ERROR HANDLING-- */
	if (last != sdata_sz)
	{
	    *rmpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**dtypemismatch", 0);
	}
	/* --END ERROR HANDLING-- */

	*rsz = last;
    }
    else
    {
	char * buf;
	MPIDI_msg_sz_t buf_off;
	MPID_Segment sseg;
	MPIDI_msg_sz_t sfirst;
	MPID_Segment rseg;
	MPIDI_msg_sz_t rfirst;

	buf = MPIU_Malloc(MPIDI_COPY_BUFFER_SZ);
	/* --BEGIN ERROR HANDLING-- */
	if (buf == NULL)
	{
	    MPIU_DBG_MSG(CH3_OTHER,TYPICAL,"SRBuf allocation failure");
	    *smpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0);
	    *rmpi_errno = *smpi_errno;
	    *rsz = 0;
	    goto fn_exit;
	}
	/* --END ERROR HANDLING-- */

	MPID_Segment_init(sbuf, scount, sdt, &sseg, 0);
	MPID_Segment_init(rbuf, rcount, rdt, &rseg, 0);

	sfirst = 0;
	rfirst = 0;
	buf_off = 0;
	
	for(;;)
	{
	    MPI_Aint last;
	    char * buf_end;

	    if (sdata_sz - sfirst > MPIDI_COPY_BUFFER_SZ - buf_off)
	    {
		last = sfirst + (MPIDI_COPY_BUFFER_SZ - buf_off);
	    }
	    else
	    {
		last = sdata_sz;
	    }
	    
	    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
               "pre-pack first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT, 
						sfirst, last ));
	    MPID_Segment_pack(&sseg, sfirst, &last, buf + buf_off);
	    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
               "post-pack first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT, 
               sfirst, last ));
	    /* --BEGIN ERROR HANDLING-- */
	    MPIU_Assert(last > sfirst);
	    /* --END ERROR HANDLING-- */
	    
	    buf_end = buf + buf_off + (last - sfirst);
	    sfirst = last;
	    
	    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
             "pre-unpack first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT, 
						rfirst, last ));
	    MPID_Segment_unpack(&rseg, rfirst, &last, buf);
	    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
             "post-unpack first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT, 
						rfirst, last ));
	    /* --BEGIN ERROR HANDLING-- */
	    MPIU_Assert(last > rfirst);
	    /* --END ERROR HANDLING-- */

	    rfirst = last;

	    if (rfirst == sdata_sz)
	    {
		/* successful completion */
		break;
	    }

	    /* --BEGIN ERROR HANDLING-- */
	    if (sfirst == sdata_sz)
	    {
		/* datatype mismatch -- remaining bytes could not be unpacked */
		*rmpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**dtypemismatch", 0);
		break;
	    }
	    /* --END ERROR HANDLING-- */

	    buf_off = sfirst - rfirst;
	    if (buf_off > 0)
	    {
		MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE, (MPIU_DBG_FDEST,
                  "moved " MPIDI_MSG_SZ_FMT " bytes to the beginning of the tmp buffer", buf_off));
		memmove(buf, buf_end - buf_off, buf_off);
	    }
	}

	*rsz = rfirst;
	MPIU_Free(buf);
    }

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_BUFFER_COPY);
}