예제 #1
0
int MPIDI_CH3_RecvRndv( MPIDI_VC_t * vc, MPID_Request *rreq )
{
    int mpi_errno = MPI_SUCCESS;

    /* A rendezvous request-to-send (RTS) message has arrived.  We need
       to send a CTS message to the remote process. */
    MPID_Request * cts_req;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_rndv_clr_to_send_t * cts_pkt = &upkt.rndv_clr_to_send;
    
    MPIU_DBG_MSG(CH3_OTHER,VERBOSE,
		 "rndv RTS in the request, sending rndv CTS");
    
    MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RNDV_CLR_TO_SEND);
    cts_pkt->sender_req_id = rreq->dev.sender_req_id;
    cts_pkt->receiver_req_id = rreq->handle;
    MPIU_THREAD_CS_ENTER(CH3COMM,vc);
    mpi_errno = MPIU_CALL(MPIDI_CH3,iStartMsg(vc, cts_pkt, 
					      sizeof(*cts_pkt), &cts_req));
    MPIU_THREAD_CS_EXIT(CH3COMM,vc);
    if (mpi_errno != MPI_SUCCESS) {
	MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|ctspkt");
    }
    if (cts_req != NULL)
    {
	/* FIXME: Ideally we could specify that a req not be returned.  
	   This would avoid our having to decrement the
	   reference count on a req we don't want/need. */
	MPID_Request_release(cts_req);
    }

 fn_fail:    
    return mpi_errno;
}
예제 #2
0
static inline void
MPIDI_RecvShortCB(pami_context_t    context,
                  const void      * _msginfo,
                  const void      * sndbuf,
                  size_t            sndlen,
                  pami_endpoint_t   sender,
                  unsigned          isSync)
{
  MPID_assert(_msginfo != NULL);

  const MPIDI_MsgInfo *msginfo = (const MPIDI_MsgInfo *)_msginfo;
  MPID_Request * rreq = NULL;
  pami_task_t source;
#if TOKEN_FLOW_CONTROL
  int          rettoks=0;
#endif

  /* -------------------- */
  /*  Match the request.  */
  /* -------------------- */
  unsigned rank       = msginfo->MPIrank;
  unsigned tag        = msginfo->MPItag;
  unsigned context_id = msginfo->MPIctxt;

  MPIU_THREAD_CS_ENTER(MSGQUEUE,0);
  source = PAMIX_Endpoint_query(sender);
  MPIDI_Receive_tokens(msginfo,source);
#ifndef OUT_OF_ORDER_HANDLING
  rreq = MPIDI_Recvq_FDP(rank, tag, context_id);
#else
  rreq = MPIDI_Recvq_FDP(rank, source, tag, context_id, msginfo->MPIseqno);
#endif

  /* Match not found */
  if (unlikely(rreq == NULL))
    {
#if (MPIDI_STATISTICS)
         MPID_NSTAT(mpid_statp->earlyArrivals);
#endif
      MPIU_THREAD_CS_EXIT(MSGQUEUE,0);
      MPID_Request *newreq = MPIDI_Request_create2();
      MPID_assert(newreq != NULL);
      if (sndlen)
      {
        newreq->mpid.uebuflen = sndlen;
        if (!TOKEN_FLOW_CONTROL_ON)
          {
            newreq->mpid.uebuf = MPL_malloc(sndlen);
            newreq->mpid.uebuf_malloc = mpiuMalloc;
          }
        else
          {
            #if TOKEN_FLOW_CONTROL
            MPIU_THREAD_CS_ENTER(MSGQUEUE,0);
            newreq->mpid.uebuf = MPIDI_mm_alloc(sndlen);
            newreq->mpid.uebuf_malloc = mpidiBufMM;
            MPIU_THREAD_CS_EXIT(MSGQUEUE,0);
            #else
            MPID_assert_always(0);
            #endif
          }
        MPID_assert(newreq->mpid.uebuf != NULL);
      }
      MPIU_THREAD_CS_ENTER(MSGQUEUE,0);
#ifndef OUT_OF_ORDER_HANDLING
      rreq = MPIDI_Recvq_FDP(rank, tag, context_id);
#else
      rreq = MPIDI_Recvq_FDP(rank, PAMIX_Endpoint_query(sender), tag, context_id, msginfo->MPIseqno);
#endif
      
      if (unlikely(rreq == NULL))
      {
        MPIDI_Callback_process_unexp(newreq, context, msginfo, sndlen, sender, sndbuf, NULL, isSync);
        /* request is always complete now */
        if (TOKEN_FLOW_CONTROL_ON && sndlen)
          {
            #if TOKEN_FLOW_CONTROL
            MPIDI_Token_cntr[source].unmatched++;
            #else
            MPID_assert_always(0);
            #endif
          }
        MPIU_THREAD_CS_EXIT(MSGQUEUE,0);
        MPID_Request_release(newreq);
        goto fn_exit_short;
      }
      else
      {       
        MPIU_THREAD_CS_EXIT(MSGQUEUE,0);
        MPID_Request_discard(newreq);
      }         
    }
  else
    {
#if (MPIDI_STATISTICS)
     MPID_NSTAT(mpid_statp->earlyArrivalsMatched);
#endif
      if (TOKEN_FLOW_CONTROL_ON && sndlen)
        {
          #if TOKEN_FLOW_CONTROL
          MPIDI_Update_rettoks(source);
          MPIDI_Must_return_tokens(context,source);
          #else
          MPID_assert_always(0);
          #endif
        }
      MPIU_THREAD_CS_EXIT(MSGQUEUE,0);
    }

  /* the receive queue processing has been completed and we found match*/

  /* ---------------------- */
  /*  Copy in information.  */
  /* ---------------------- */
  rreq->status.MPI_SOURCE = rank;
  rreq->status.MPI_TAG    = tag;
  MPIR_STATUS_SET_COUNT(rreq->status, sndlen);
  MPIDI_Request_setCA          (rreq, MPIDI_CA_COMPLETE);
  MPIDI_Request_cpyPeerRequestH(rreq, msginfo);
  MPIDI_Request_setSync        (rreq, isSync);
  MPIDI_Request_setRzv         (rreq, 0);

  /* ----------------------------- */
  /*  Request was already posted.  */
  /* ----------------------------- */
  if (unlikely(isSync))
    MPIDI_SyncAck_post(context, rreq, PAMIX_Endpoint_query(sender));

  if (unlikely(HANDLE_GET_KIND(rreq->mpid.datatype) != HANDLE_KIND_BUILTIN))
    {
      MPIDI_Callback_process_userdefined_dt(context, sndbuf, sndlen, rreq);
      goto fn_exit_short;
    }

  size_t dt_size = rreq->mpid.userbufcount * MPID_Datatype_get_basic_size(rreq->mpid.datatype);

  /* ----------------------------- */
  /*  Test for truncated message.  */
  /* ----------------------------- */
  if (unlikely(sndlen > dt_size))
    {
#if ASSERT_LEVEL > 0
      MPIDI_Callback_process_trunc(context, rreq, NULL, sndbuf);
      goto fn_exit_short;
#else
      sndlen = dt_size;
#endif
    }

  MPID_assert(rreq->mpid.uebuf    == NULL);
  MPID_assert(rreq->mpid.uebuflen == 0);
  void* rcvbuf = rreq->mpid.userbuf;

  if (sndlen > 0)
  {
#if CUDA_AWARE_SUPPORT
    if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(rcvbuf))
    {
      cudaError_t cudaerr = CudaMemcpy(rcvbuf, sndbuf, (size_t)sndlen, cudaMemcpyHostToDevice);
    }
    else
#endif
      memcpy(rcvbuf, sndbuf, sndlen);
  }
  TRACE_SET_R_VAL(source,(rreq->mpid.idx),rlen,sndlen);
  TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.comp_in_HH);
  TRACE_SET_R_VAL(source,(rreq->mpid.idx),bufadd,rreq->mpid.userbuf);
  MPIDI_Request_complete(rreq);

 fn_exit_short:
#ifdef OUT_OF_ORDER_HANDLING
  MPIU_THREAD_CS_ENTER(MSGQUEUE,0);
  if (MPIDI_In_cntr[source].n_OutOfOrderMsgs>0)  {
    MPIDI_Recvq_process_out_of_order_msgs(source, context);
  }
  MPIU_THREAD_CS_EXIT(MSGQUEUE,0);
#endif

  /* ---------------------------------------- */
  /*  Signal that the recv has been started.  */
  /* ---------------------------------------- */
  MPIDI_Progress_signal();
}
예제 #3
0
/*@
   MPI_Init_thread - Initialize the MPI execution environment

   Input Parameters:
+  argc - Pointer to the number of arguments 
.  argv - Pointer to the argument vector
-  required - Level of desired thread support

   Output Parameter:
.  provided - Level of provided thread support

   Command line arguments:
   MPI specifies no command-line arguments but does allow an MPI 
   implementation to make use of them.  See 'MPI_INIT' for a description of 
   the command line arguments supported by 'MPI_INIT' and 'MPI_INIT_THREAD'.

   Notes:
   The valid values for the level of thread support are\:
+ MPI_THREAD_SINGLE - Only one thread will execute. 
. MPI_THREAD_FUNNELED - The process may be multi-threaded, but only the main 
  thread will make MPI calls (all MPI calls are funneled to the 
   main thread). 
. MPI_THREAD_SERIALIZED - The process may be multi-threaded, and multiple 
  threads may make MPI calls, but only one at a time: MPI calls are not 
  made concurrently from two distinct threads (all MPI calls are serialized). 
- MPI_THREAD_MULTIPLE - Multiple threads may call MPI, with no restrictions. 

Notes for Fortran:
   Note that the Fortran binding for this routine does not have the 'argc' and
   'argv' arguments. ('MPI_INIT_THREAD(required, provided, ierror)')


.N Errors
.N MPI_SUCCESS
.N MPI_ERR_OTHER

.seealso: MPI_Init, MPI_Finalize
@*/
int MPI_Init_thread( int *argc, char ***argv, int required, int *provided )
{
    int mpi_errno = MPI_SUCCESS;
    int rc, reqd = required;
    MPID_MPI_INIT_STATE_DECL(MPID_STATE_MPI_INIT_THREAD);

    rc = MPID_Wtime_init();
#ifdef USE_DBG_LOGGING
    MPIU_DBG_PreInit( argc, argv, rc );
#endif

    MPID_MPI_INIT_FUNC_ENTER(MPID_STATE_MPI_INIT_THREAD);

#if defined(_OSU_MVAPICH_) || defined(_OSU_PSM_)
    MV2_Read_env_vars();
#endif /* defined(_OSU_MVAPICH_) || defined(_OSU_PSM_) */

#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            if (MPIR_Process.initialized != MPICH_PRE_INIT) {
                mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, "MPI_Init_thread", __LINE__, MPI_ERR_OTHER,
						  "**inittwice", 0 );
	    }
            if (mpi_errno != MPI_SUCCESS) goto fn_fail;
        }
        MPID_END_ERROR_CHECKS;
    }
#   endif /* HAVE_ERROR_CHECKING */

    /* ... body of routine ... */

    /* If the user requested for asynchronous progress, request for
     * THREAD_MULTIPLE. */
    rc = 0;
    MPL_env2bool("MPICH_ASYNC_PROGRESS", &rc);
    if (rc)
        reqd = MPI_THREAD_MULTIPLE;

    mpi_errno = MPIR_Init_thread( argc, argv, reqd, provided );
    if (mpi_errno != MPI_SUCCESS) goto fn_fail;

    if (rc && *provided == MPI_THREAD_MULTIPLE) {
        mpi_errno = MPIR_Init_async_thread();
        if (mpi_errno) goto fn_fail;

        MPIR_async_thread_initialized = 1;
    }

    /* ... end of body of routine ... */

    MPID_MPI_INIT_FUNC_EXIT(MPID_STATE_MPI_INIT_THREAD);
    return mpi_errno;

  fn_fail:
    /* --BEGIN ERROR HANDLING-- */
#   ifdef HAVE_ERROR_REPORTING
    {
	mpi_errno = MPIR_Err_create_code(
	    mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, 
	    "**mpi_init_thread",
	    "**mpi_init_thread %p %p %d %p", argc, argv, required, provided);
    }
#   endif
    mpi_errno = MPIR_Err_return_comm( 0, FCNAME, mpi_errno );
    MPID_MPI_INIT_FUNC_EXIT(MPID_STATE_MPI_INIT_THREAD);

    MPIU_THREAD_CS_EXIT(INIT,*provided);

    return mpi_errno;
    /* --END ERROR HANDLING-- */
}
예제 #4
0
int MPIR_Init_thread(int * argc, char ***argv, int required, int * provided)
{
    int mpi_errno = MPI_SUCCESS;
    int has_args;
    int has_env;
    int thread_provided;
    int exit_init_cs_on_failure = 0;

    /* For any code in the device that wants to check for runtime 
       decisions on the value of isThreaded, set a provisional
       value here. We could let the MPID_Init routine override this */
#ifdef HAVE_RUNTIME_THREADCHECK
    MPIR_ThreadInfo.isThreaded = required == MPI_THREAD_MULTIPLE;
#endif

    MPIU_THREAD_CS_INIT;

    /* FIXME: Move to os-dependent interface? */
#ifdef HAVE_WINDOWS_H
    /* prevent the process from bringing up an error message window if mpich 
       asserts */
    _CrtSetReportMode( _CRT_ASSERT, _CRTDBG_MODE_FILE );
    _CrtSetReportFile( _CRT_ASSERT, _CRTDBG_FILE_STDERR );
    _CrtSetReportHook2(_CRT_RPTHOOK_INSTALL, assert_hook);
#ifdef _WIN64
    {
    /* FIXME: (Windows) This severly degrades performance but fixes alignment 
       issues with the datatype code. */
    /* Prevent misaligned faults on Win64 machines */
    UINT mode, old_mode;
    
    old_mode = SetErrorMode(SEM_NOALIGNMENTFAULTEXCEPT);
    mode = old_mode | SEM_NOALIGNMENTFAULTEXCEPT;
    SetErrorMode(mode);
    }
#endif
#endif

    /* We need this inorder to implement IS_THREAD_MAIN */
#   if (MPICH_THREAD_LEVEL >= MPI_THREAD_SERIALIZED) && defined(MPICH_IS_THREADED)
    {
	MPID_Thread_self(&MPIR_ThreadInfo.master_thread);
    }
#   endif

#ifdef HAVE_ERROR_CHECKING
    /* Because the PARAM system has not been initialized, temporarily
       uncondtionally enable error checks.  Once the PARAM system is
       initialized, this may be reset */
    MPIR_Process.do_error_checks = 1;
#else
    MPIR_Process.do_error_checks = 0;
#endif

    /* Initialize necessary subsystems and setup the predefined attribute
       values.  Subsystems may change these values. */
    MPIR_Process.attrs.appnum          = -1;
    MPIR_Process.attrs.host            = 0;
    MPIR_Process.attrs.io              = 0;
    MPIR_Process.attrs.lastusedcode    = MPI_ERR_LASTCODE;
    MPIR_Process.attrs.tag_ub          = 0;
    MPIR_Process.attrs.universe        = MPIR_UNIVERSE_SIZE_NOT_SET;
    MPIR_Process.attrs.wtime_is_global = 0;

    /* Set the functions used to duplicate attributes.  These are 
       when the first corresponding keyval is created */
    MPIR_Process.attr_dup  = 0;
    MPIR_Process.attr_free = 0;

#ifdef HAVE_CXX_BINDING
    /* Set the functions used to call functions in the C++ binding 
       for reductions and attribute operations.  These are null
       until a C++ operation is defined.  This allows the C code
       that implements these operations to not invoke a C++ code
       directly, which may force the inclusion of symbols known only
       to the C++ compiler (e.g., under more non-GNU compilers, including
       Solaris and IRIX). */
    MPIR_Process.cxx_call_op_fn = 0;

#endif
    /* This allows the device to select an alternative function for 
       dimsCreate */
    MPIR_Process.dimsCreate     = 0;

    /* "Allocate" from the reserved space for builtin communicators and
       (partially) initialize predefined communicators.  comm_parent is
       intially NULL and will be allocated by the device if the process group
       was started using one of the MPI_Comm_spawn functions. */
    MPIR_Process.comm_world		    = MPID_Comm_builtin + 0;
    MPIR_Comm_init(MPIR_Process.comm_world);
    MPIR_Process.comm_world->handle	    = MPI_COMM_WORLD;
    MPIR_Process.comm_world->context_id	    = 0 << MPID_CONTEXT_PREFIX_SHIFT;
    MPIR_Process.comm_world->recvcontext_id = 0 << MPID_CONTEXT_PREFIX_SHIFT;
    MPIR_Process.comm_world->comm_kind	    = MPID_INTRACOMM;
    /* This initialization of the comm name could be done only when 
       comm_get_name is called */
    MPIU_Strncpy(MPIR_Process.comm_world->name, "MPI_COMM_WORLD",
		 MPI_MAX_OBJECT_NAME);

    MPIR_Process.comm_self		    = MPID_Comm_builtin + 1;
    MPIR_Comm_init(MPIR_Process.comm_self);
    MPIR_Process.comm_self->handle	    = MPI_COMM_SELF;
    MPIR_Process.comm_self->context_id	    = 1 << MPID_CONTEXT_PREFIX_SHIFT;
    MPIR_Process.comm_self->recvcontext_id  = 1 << MPID_CONTEXT_PREFIX_SHIFT;
    MPIR_Process.comm_self->comm_kind	    = MPID_INTRACOMM;
    MPIU_Strncpy(MPIR_Process.comm_self->name, "MPI_COMM_SELF",
		 MPI_MAX_OBJECT_NAME);

#ifdef MPID_NEEDS_ICOMM_WORLD
    MPIR_Process.icomm_world		    = MPID_Comm_builtin + 2;
    MPIR_Comm_init(MPIR_Process.icomm_world);
    MPIR_Process.icomm_world->handle	    = MPIR_ICOMM_WORLD;
    MPIR_Process.icomm_world->context_id    = 2 << MPID_CONTEXT_PREFIX_SHIFT;
    MPIR_Process.icomm_world->recvcontext_id= 2 << MPID_CONTEXT_PREFIX_SHIFT;
    MPIR_Process.icomm_world->comm_kind	    = MPID_INTRACOMM;
    MPIU_Strncpy(MPIR_Process.icomm_world->name, "MPI_ICOMM_WORLD",
		 MPI_MAX_OBJECT_NAME);

    /* Note that these communicators are not ready for use - MPID_Init 
       will setup self and world, and icomm_world if it desires it. */
#endif

    MPIR_Process.comm_parent = NULL;

    /* Setup the initial communicator list in case we have 
       enabled the debugger message-queue interface */
    MPIR_COMML_REMEMBER( MPIR_Process.comm_world );
    MPIR_COMML_REMEMBER( MPIR_Process.comm_self );

    /* Call any and all MPID_Init type functions */
    MPIR_Err_init();
    MPIR_Datatype_init();
    MPIR_Group_init();

    /* MPIU_Timer_pre_init(); */

    mpi_errno = MPIR_Param_init_params();
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    /* Wait for debugger to attach if requested. */
    if (MPIR_PARAM_DEBUG_HOLD) {
        volatile int hold = 1;
        while (hold)
#ifdef HAVE_USLEEP
            usleep(100);
#endif
            ;
    }


#if HAVE_ERROR_CHECKING == MPID_ERROR_LEVEL_RUNTIME
    MPIR_Process.do_error_checks = MPIR_PARAM_ERROR_CHECKING;
#endif

    /* define MPI as initialized so that we can use MPI functions within 
       MPID_Init if necessary */
    MPIR_Process.initialized = MPICH_WITHIN_MPI;

    /* We can't acquire any critical sections until this point.  Any
     * earlier the basic data structures haven't been initialized */
    MPIU_THREAD_CS_ENTER(INIT,required);
    exit_init_cs_on_failure = 1;

    mpi_errno = MPID_Init(argc, argv, required, &thread_provided, 
			  &has_args, &has_env);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    /* Capture the level of thread support provided */
    MPIR_ThreadInfo.thread_provided = thread_provided;
    if (provided) *provided = thread_provided;
#ifdef HAVE_RUNTIME_THREADCHECK
    MPIR_ThreadInfo.isThreaded = (thread_provided == MPI_THREAD_MULTIPLE);
#endif

    /* FIXME: Define these in the interface.  Does Timer init belong here? */
    MPIU_dbg_init(MPIR_Process.comm_world->rank);
    MPIU_Timer_init(MPIR_Process.comm_world->rank,
		    MPIR_Process.comm_world->local_size);
#ifdef USE_MEMORY_TRACING
    MPIU_trinit( MPIR_Process.comm_world->rank );
    /* Indicate that we are near the end of the init step; memory 
       allocated already will have an id of zero; this helps 
       separate memory leaks in the initialization code from 
       leaks in the "active" code */
    /* Uncomment this code to leave out any of the MPID_Init/etc 
       memory allocations from the memory leak testing */
    /* MPIU_trid( 1 ); */
#endif
#ifdef USE_DBG_LOGGING
    MPIU_DBG_Init( argc, argv, has_args, has_env, 
		   MPIR_Process.comm_world->rank );
#endif

    /* Initialize the C versions of the Fortran link-time constants.
       
       We now initialize the Fortran symbols from within the Fortran 
       interface in the routine that first needs the symbols.
       This fixes a problem with symbols added by a Fortran compiler that 
       are not part of the C runtime environment (the Portland group
       compilers would do this) 
    */
#if defined(HAVE_FORTRAN_BINDING) && defined(HAVE_MPI_F_INIT_WORKS_WITH_C)
    mpirinitf_();
#endif

    /* FIXME: Does this need to come before the call to MPID_InitComplete?
       For some debugger support, MPIR_WaitForDebugger may want to use
       MPI communication routines to collect information for the debugger */
#ifdef HAVE_DEBUGGER_SUPPORT
    MPIR_WaitForDebugger();
#endif

    /* Let the device know that the rest of the init process is completed */
    if (mpi_errno == MPI_SUCCESS) 
	mpi_errno = MPID_InitCompleted();

#if defined(_OSU_MVAPICH_) || defined(_OSU_PSM_)
    if (is_shmem_collectives_enabled()){
        if (check_split_comm(pthread_self())){
            int my_id, size;
            PMPI_Comm_rank(MPI_COMM_WORLD, &my_id);
            PMPI_Comm_size(MPI_COMM_WORLD, &size);
            disable_split_comm(pthread_self());
            create_2level_comm(MPI_COMM_WORLD, size, my_id);
            enable_split_comm(pthread_self());
        }
    }
#endif /* defined(_OSU_MVAPICH_) || defined(_OSU_PSM_) */


fn_exit:
    MPIU_THREAD_CS_EXIT(INIT,required);
    return mpi_errno;

fn_fail:
    /* --BEGIN ERROR HANDLING-- */
    /* signal to error handling routines that core services are unavailable */
    MPIR_Process.initialized = MPICH_PRE_INIT;

    if (exit_init_cs_on_failure) {
        MPIU_THREAD_CS_EXIT(INIT,required);
    }
    MPIU_THREAD_CS_FINALIZE;
    return mpi_errno;
    /* --END ERROR HANDLING-- */
}
예제 #5
0
int 
MPID_nem_newmad_poll(int in_blocking_poll)
{
   nm_sr_request_t *p_request = NULL;
   nm_tag_t         match_info = 0;
   int mpi_errno = MPI_SUCCESS;   
   
   nm_sr_send_success(mpid_nem_newmad_session, &p_request);
   if (p_request != NULL)
   {
      MPID_nem_newmad_unified_req_t *ref;
      MPID_Request                  *req;
      MPID_Request_kind_t            kind;      
      MPIR_Context_id_t              ctxt;
      
      nm_sr_get_stag(mpid_nem_newmad_session,p_request, &match_info);
      NEM_NMAD_MATCH_GET_CTXT(match_info, ctxt);
            
      nm_sr_get_ref(mpid_nem_newmad_session,p_request,(void *)&ref);
      req = &(ref->mpi_req);
      MPIU_Assert(req != NULL);
      kind = req->kind;
	
      if(ctxt == NEM_NMAD_INTRA_CTXT)
      {
	 if ((kind == MPID_REQUEST_SEND) || (kind == MPID_PREQUEST_SEND))
	 {
	    MPID_nem_newmad_handle_sreq(req);	    
	 }
      }
      else
      {
	 if ((kind == MPID_REQUEST_SEND) || (kind == MPID_PREQUEST_SEND))
	 {
	    MPIU_Assert(MPIDI_Request_get_type(req) != MPIDI_REQUEST_TYPE_GET_RESP);                  
	    MPID_nem_newmad_handle_sreq(req);	    
	 }
      }   
   }
   
   nm_sr_recv_success(mpid_nem_newmad_session, &p_request);
   if (p_request != NULL)
   {
      MPID_nem_newmad_unified_req_t *ref;
      MPID_Request                  *req;
      MPID_Request_kind_t            kind;      
      MPIR_Context_id_t              ctxt;
      size_t                         size;
            
      nm_sr_get_ref(mpid_nem_newmad_session,p_request,(void *)&ref);
      req = &(ref->mpi_req);
      MPIU_Assert(req != NULL);
      kind = req->kind;
      nm_sr_get_size(mpid_nem_newmad_session, p_request, &size);
      nm_sr_get_rtag(mpid_nem_newmad_session,p_request, &match_info);
      NEM_NMAD_MATCH_GET_CTXT(match_info, ctxt);
	
      if(ctxt == NEM_NMAD_INTRA_CTXT)
	  {
	 MPID_nem_newmad_internal_req_t *adi_req = &(ref->nem_newmad_req);
	 if (kind == MPID_REQUEST_RECV)
	 {
	    if (size <= sizeof(MPIDI_CH3_PktGeneric_t))
	    {
	       MPID_nem_handle_pkt(adi_req->vc,(char *)&(adi_req->pending_pkt),(MPIDI_msg_sz_t)(size));
	    }
	    else
	    {
	       MPID_nem_handle_pkt(adi_req->vc,(char *)(adi_req->tmpbuf),(MPIDI_msg_sz_t)(adi_req->tmpbuf_sz));
	       MPIU_Free(adi_req->tmpbuf);
	    }
	    nm_core_disable_progression(mpid_nem_newmad_session->p_core);
	    MPID_nem_newmad_internal_req_enqueue(adi_req);
	    nm_core_enable_progression(mpid_nem_newmad_session->p_core);
	 }
	 else
	 {
	    MPIU_Assert(0);
	 }	 
      }
      else
      {
	 if ((kind == MPID_REQUEST_RECV) || (kind == MPID_PREQUEST_RECV))
	 {
	    int found = FALSE;
	    nm_sr_request_t *nmad_request = NULL;	       
	    MPIU_Assert(MPIDI_Request_get_type(req) != MPIDI_REQUEST_TYPE_GET_RESP);
	    MPIU_THREAD_CS_ENTER(MSGQUEUE,req);
	    MPID_NEM_NMAD_GET_REQ_FROM_HASH(req,nmad_request);
	    if(nmad_request != NULL)
	    {
	       MPIU_Assert(req->dev.match.parts.rank == MPI_ANY_SOURCE);
	       MPIU_Free(nmad_request);
	    }
	    found = MPIDI_CH3U_Recvq_DP(req);
	    if(found){
		MPID_nem_newmad_handle_rreq(req,match_info,size);
	    }
	    MPIU_THREAD_CS_EXIT(MSGQUEUE,req);
	 }
	 else
	 {
	    fprintf(stdout, ">>>>>>>>>>>>> ERROR: Wrong req type : %i (%p)\n",(int)kind,req);
	    MPIU_Assert(0);
	 }
      }   
   }

 fn_exit:
   return mpi_errno;
 fn_fail:  ATTRIBUTE((unused))
   goto fn_exit;   
}
예제 #6
0
int MPID_Isend(const void * buf, int count, MPI_Datatype datatype, int rank, 
	       int tag, MPID_Comm * comm, int context_offset,
               MPID_Request ** request)
{
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype * dt_ptr;
    MPID_Request * sreq;
    MPIDI_VC_t * vc=0;
#if defined(MPID_USE_SEQUENCE_NUMBERS)
    MPID_Seqnum_t seqnum;
#endif    
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_ISEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_ISEND);

    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
                  "rank=%d, tag=%d, context=%d", 
                  rank, tag, comm->context_id + context_offset));
    
    if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM)
    {
#if defined (_OSU_PSM_)
        goto skip_self_send; /* psm will internally do self-send, no special
                                handling is needed here */
#endif /* _OSU_PSM_ */          
	mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, 
			    context_offset, MPIDI_REQUEST_TYPE_SEND, &sreq);
	goto fn_exit;
    }
#if defined (_OSU_PSM_)
skip_self_send:
#endif

    if (rank != MPI_PROC_NULL) {
        MPIDI_Comm_get_vc_set_active(comm, rank, &vc);
#ifdef ENABLE_COMM_OVERRIDES
        /* this needs to come before the sreq is created, since the override
         * function is responsible for creating its own request */
        if (vc->comm_ops && vc->comm_ops->isend)
        {
            mpi_errno = vc->comm_ops->isend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq);
            goto fn_exit;
        }
#endif
    }

    MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);

    if (rank == MPI_PROC_NULL)
    {
	MPIU_Object_set_ref(sreq, 1);
        MPID_cc_set(&sreq->cc, 0);
	goto fn_exit;
    }

    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, 
			    dt_true_lb);
    
    if (data_sz == 0)
    {
#if defined (_OSU_PSM_)
        goto eager_send;
#endif /* _OSU_PSM_ */
	MPIDI_CH3_Pkt_t upkt;
	MPIDI_CH3_Pkt_eager_send_t * const eager_pkt = &upkt.eager_send;

	MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_EAGER_MSG);
	sreq->dev.OnDataAvail = 0;
	    
	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message");
	MPIDI_Pkt_init(eager_pkt, MPIDI_CH3_PKT_EAGER_SEND);
	eager_pkt->match.parts.rank = comm->rank;
	eager_pkt->match.parts.tag = tag;
	eager_pkt->match.parts.context_id = comm->context_id + context_offset;
	eager_pkt->sender_req_id = sreq->handle;
	eager_pkt->data_sz = 0;
	
	MPIDI_VC_FAI_send_seqnum(vc, seqnum);
	MPIDI_Pkt_set_seqnum(eager_pkt, seqnum);
	MPIDI_Request_set_seqnum(sreq, seqnum);
	
	MPIU_THREAD_CS_ENTER(CH3COMM,vc);
	mpi_errno = MPIU_CALL(MPIDI_CH3,iSend(vc, sreq, eager_pkt, 
					      sizeof(*eager_pkt)));
	MPIU_THREAD_CS_EXIT(CH3COMM,vc);
	/* --BEGIN ERROR HANDLING-- */
	if (mpi_errno != MPI_SUCCESS)
	{
	    MPIU_Object_set_ref(sreq, 0);
	    MPIDI_CH3_Request_destroy(sreq);
	    sreq = NULL;
            MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3|eagermsg");
	    goto fn_exit;
	}
	/* --END ERROR HANDLING-- */

	goto fn_exit;
    }

#if defined (_OSU_PSM_)
    if(HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) {
        sreq->dev.datatype_ptr = dt_ptr;
        MPID_Datatype_add_ref(dt_ptr);
        sreq->psm_flags |= PSM_NEED_DTYPE_RELEASE;
    }
    if(vc->force_eager)
        goto eager_send;
#endif /* _OSU_PSM_ */

#if defined(_OSU_MVAPICH_)
    int i;
    for (i = 0 ; i < rdma_num_extra_polls; i++)
    {
        if (rdma_global_ext_sendq_size > 1)
            MPID_Progress_test();
    }
#endif
    /* FIXME: flow control: limit number of outstanding eager messsages 
       containing data and need to be buffered by the receiver */
#if defined(_OSU_MVAPICH_)
    if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <=	vc->eager_max_msg_sz
        && !vc->force_rndv)
#else /* defined(_OSU_MVAPICH_) */
    if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <=	vc->eager_max_msg_sz)
#endif /* defined(_OSU_MVAPICH_) */
    {
#if defined (_OSU_PSM_)
eager_send:
#endif /* _OSU_PSM */
        if (dt_contig) 
        {
            mpi_errno = MPIDI_CH3_EagerContigIsend( &sreq, 
                                MPIDI_CH3_PKT_EAGER_SEND,
                                (char*)buf + dt_true_lb, 
                                data_sz, rank, tag, 
                                comm, context_offset );
        } 
        else 
        {
#if defined (_OSU_PSM_)
            sreq->psm_flags |= PSM_NON_BLOCKING_SEND;
#endif
            mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, 
                                                          MPIDI_CH3_PKT_EAGER_SEND,
                                                          buf, count, datatype,
                                                          data_sz, rank, tag, 
                                                          comm, context_offset );
#if defined (_OSU_PSM_)
            goto fn_exit;
#endif            
            /* If we're not complete, then add a reference to the datatype */
            if (sreq && sreq->dev.OnDataAvail) {
                sreq->dev.datatype_ptr = dt_ptr;
                MPID_Datatype_add_ref(dt_ptr);
            }
        }
    }
    else
    {
	/* Note that the sreq was created above */
	MPIDI_Request_set_msg_type( sreq, MPIDI_REQUEST_RNDV_MSG );
	mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig,
                                     data_sz, dt_true_lb, rank, tag, comm, 
                                     context_offset );
	/* FIXME: fill temporary IOV or pack temporary buffer after send to 
	   hide some latency.  This requires synchronization
           because the CTS packet could arrive and be processed before the 
	   above iStartmsg completes (depending on the progress
           engine, threads, etc.). */
#if defined(_OSU_MVAPICH_)
        /* rndv transfers need to process CTS packet to initiate the actual RDMA transfer */
        MPID_Progress_test();
#endif /* defined(_OSU_MVAPICH_) */
	
	if (sreq && dt_ptr != NULL)
	{
	    sreq->dev.datatype_ptr = dt_ptr;
	    MPID_Datatype_add_ref(dt_ptr);
	}
    }

  fn_exit:
    *request = sreq;

#if defined(_OSU_MVAPICH_)
    for (i = 0 ; i < rdma_num_extra_polls; i++)
    {
        if (rdma_global_ext_sendq_size > 1)
            MPID_Progress_test();
    }
#endif

    MPIU_DBG_STMT(CH3_OTHER,VERBOSE,
    {
	if (sreq != NULL)
	{
	    MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,"request allocated, handle=0x%08x", sreq->handle);
	}
    }
		  );
예제 #7
0
/*@
   MPI_Win_get_errhandler - Get the error handler for the MPI RMA window

Input Parameters:
. win - window (handle) 

Output Parameters:
. errhandler - error handler currently associated with window (handle) 

.N ThreadSafe

.N Fortran

.N Errors
.N MPI_SUCCESS
.N MPI_ERR_WIN
.N MPI_ERR_OTHER
@*/
int MPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler *errhandler)
{
#ifdef HAVE_ERROR_CHECKING
    static const char FCNAME[] = "MPI_Win_get_errhandler";
#endif
    int mpi_errno = MPI_SUCCESS;
    MPID_Win *win_ptr = NULL;
    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_GET_ERRHANDLER);

    MPIR_ERRTEST_INITIALIZED_ORDIE();
    
    MPIU_THREAD_CS_ENTER(ALLFUNC,);
    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_GET_ERRHANDLER);

    /* Validate parameters, especially handles needing to be converted */
#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
	    MPIR_ERRTEST_WIN(win, mpi_errno);
        }
        MPID_END_ERROR_CHECKS;
    }
#   endif
    
    /* Convert MPI object handles to object pointers */
    MPID_Win_get_ptr( win, win_ptr );
    
    /* Validate parameters and objects (post conversion) */
#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
	    MPIR_ERRTEST_ARGNULL(errhandler,"errhandler",mpi_errno);
            /* Validate win_ptr */
            MPID_Win_valid_ptr( win_ptr, mpi_errno );
	    /* If win_ptr is not valid, it will be reset to null */
            if (mpi_errno) goto fn_fail;
        }
        MPID_END_ERROR_CHECKS;
    }
#   endif /* HAVE_ERROR_CHECKING */

    /* ... body of routine ...  */

    MPIU_THREAD_CS_ENTER(MPI_OBJ, win_ptr);

    if (win_ptr->errhandler) {
	*errhandler = win_ptr->errhandler->handle;
	MPIR_Errhandler_add_ref(win_ptr->errhandler);
    }
    else {
	/* Use the default */
	*errhandler = MPI_ERRORS_ARE_FATAL;
    }

    MPIU_THREAD_CS_EXIT(MPI_OBJ, win_ptr);

    /* ... end of body of routine ... */

#ifdef HAVE_ERROR_CHECKING
  fn_exit:
#endif
    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_GET_ERRHANDLER);
    MPIU_THREAD_CS_EXIT(ALLFUNC,);
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
#   ifdef HAVE_ERROR_CHECKING
  fn_fail:
    {
	mpi_errno = MPIR_Err_create_code(
	    mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, 
	    "**mpi_win_get_errhandler",
	    "**mpi_win_get_errhandler %W %p", win, errhandler);
    }
    mpi_errno = MPIR_Err_return_win(win_ptr, FCNAME, mpi_errno);
    goto fn_exit;
#   endif
    /* --END ERROR HANDLING-- */
}
예제 #8
0
int MPID_Irsend(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset,
		MPID_Request ** request)
{
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_ready_send_t * const ready_pkt = &upkt.ready_send;
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype * dt_ptr;
    MPID_Request * sreq;
    MPIDI_VC_t * vc;
#if defined(MPID_USE_SEQUENCE_NUMBERS)
    MPID_Seqnum_t seqnum;
#endif    
    int mpi_errno = MPI_SUCCESS;    
    MPIDI_STATE_DECL(MPID_STATE_MPID_IRSEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_IRSEND);

    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
                "rank=%d, tag=%d, context=%d", 
                rank, tag, comm->context_id + context_offset));
    
    if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM)
    {
	mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_RSEND, &sreq);
	goto fn_exit;
    }
    
    MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_RSEND);
    MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_EAGER_MSG);
    
    if (rank == MPI_PROC_NULL)
    {
	MPIU_Object_set_ref(sreq, 1);
        MPID_cc_set(&sreq->cc, 0);
	goto fn_exit;
    }
    
    MPIDI_Comm_get_vc_set_active(comm, rank, &vc);

#ifdef ENABLE_COMM_OVERRIDES
    if (vc->comm_ops && vc->comm_ops->irsend)
    {
	mpi_errno = vc->comm_ops->irsend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq);
	goto fn_exit;
    }
#endif
    
    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);

    MPIDI_Pkt_init(ready_pkt, MPIDI_CH3_PKT_READY_SEND);
    ready_pkt->match.parts.rank = comm->rank;
    ready_pkt->match.parts.tag = tag;
    ready_pkt->match.parts.context_id = comm->context_id + context_offset;
    ready_pkt->sender_req_id = MPI_REQUEST_NULL;
    ready_pkt->data_sz = data_sz;

    if (data_sz == 0)
    {
	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message");

	sreq->dev.OnDataAvail = 0;
	
	MPIDI_VC_FAI_send_seqnum(vc, seqnum);
	MPIDI_Pkt_set_seqnum(ready_pkt, seqnum);
	MPIDI_Request_set_seqnum(sreq, seqnum);
	
	MPIU_THREAD_CS_ENTER(CH3COMM,vc);
	mpi_errno = MPIDI_CH3_iSend(vc, sreq, ready_pkt, sizeof(*ready_pkt));
	MPIU_THREAD_CS_EXIT(CH3COMM,vc);
	/* --BEGIN ERROR HANDLING-- */
	if (mpi_errno != MPI_SUCCESS)
	{
	    MPIU_Object_set_ref(sreq, 0);
	    MPIDI_CH3_Request_destroy(sreq);
	    sreq = NULL;
            MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3|eagermsg");
	    goto fn_exit;
	}
	/* --END ERROR HANDLING-- */
	goto fn_exit;
    }
    
    if (vc->ready_eager_max_msg_sz < 0 || data_sz + sizeof(MPIDI_CH3_Pkt_ready_send_t) <= vc->ready_eager_max_msg_sz) {
        if (dt_contig) {
            mpi_errno = MPIDI_CH3_EagerContigIsend( &sreq,
                                                    MPIDI_CH3_PKT_READY_SEND,
                                                    (char*)buf + dt_true_lb,
                                                    data_sz, rank, tag,
                                                    comm, context_offset );
            
        }
        else {
            mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq,
                                                      MPIDI_CH3_PKT_READY_SEND,
                                                      buf, count, datatype,
                                                      data_sz, rank, tag,
                                                      comm, context_offset );
            /* If we're not complete, then add a reference to the datatype */
            if (sreq && sreq->dev.OnDataAvail) {
                sreq->dev.datatype_ptr = dt_ptr;
                MPID_Datatype_add_ref(dt_ptr);
            }
        }
    } else {
 	/* Do rendezvous.  This will be sent as a regular send not as
           a ready send, so the receiver won't know to send an error
           if the receive has not been posted */
	MPIDI_Request_set_msg_type( sreq, MPIDI_REQUEST_RNDV_MSG );
	mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig,
                                     data_sz, dt_true_lb, rank, tag, comm,
                                     context_offset );
	if (sreq && dt_ptr != NULL) {
	    sreq->dev.datatype_ptr = dt_ptr;
	    MPID_Datatype_add_ref(dt_ptr);
	}
    }

  fn_exit:
    *request = sreq;

    MPIU_DBG_STMT(CH3_OTHER,VERBOSE,{
	if (sreq != NULL)
	{
	    MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,"request allocated, handle=0x%08x", sreq->handle);
	}
    }
		  );
예제 #9
0
int MPID_Send(const void * buf, int count, MPI_Datatype datatype, int rank, 
	      int tag, MPID_Comm * comm, int context_offset,
	      MPID_Request ** request)
{
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype * dt_ptr;
    MPID_Request * sreq = NULL;
    MPIDI_VC_t * vc;
#if defined(MPID_USE_SEQUENCE_NUMBERS)
    MPID_Seqnum_t seqnum;
#endif    
    int eager_threshold = -1;
    int mpi_errno = MPI_SUCCESS;    
    MPIDI_STATE_DECL(MPID_STATE_MPID_SEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEND);

    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
                "rank=%d, tag=%d, context=%d", 
		rank, tag, comm->context_id + context_offset));

    /* Check to make sure the communicator hasn't already been revoked */
    if (comm->revoked &&
            MPIR_AGREE_TAG != MPIR_TAG_MASK_ERROR_BIT(tag & ~MPIR_Process.tagged_coll_mask) &&
            MPIR_SHRINK_TAG != MPIR_TAG_MASK_ERROR_BIT(tag & ~MPIR_Process.tagged_coll_mask)) {
        MPIU_ERR_SETANDJUMP(mpi_errno,MPIX_ERR_REVOKED,"**revoked");
    }

    if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM)
    {
	mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, 
				     context_offset, MPIDI_REQUEST_TYPE_SEND, 
				     &sreq);

	/* In the single threaded case, sending to yourself will cause 
	   deadlock.  Note that in the runtime-thread case, this check
	   will not be made (long-term FIXME) */
#       ifndef MPICH_IS_THREADED
	{
	    if (sreq != NULL && sreq->cc != 0) {
		MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,
				    "**dev|selfsenddeadlock");
	    }
	}
#	endif
	if (mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
	goto fn_exit;
    }

    if (rank == MPI_PROC_NULL)
    {
	goto fn_exit;
    }

    MPIDI_Comm_get_vc_set_active(comm, rank, &vc);

#ifdef ENABLE_COMM_OVERRIDES
    if (vc->comm_ops && vc->comm_ops->send)
    {
	mpi_errno = vc->comm_ops->send( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq);
	goto fn_exit;
    }
#endif

    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, 
			    dt_true_lb);


    if (data_sz == 0)
    {
	MPIDI_CH3_Pkt_t upkt;
	MPIDI_CH3_Pkt_eager_send_t * const eager_pkt = &upkt.eager_send;

	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message");
	MPIDI_Pkt_init(eager_pkt, MPIDI_CH3_PKT_EAGER_SEND);
	eager_pkt->match.parts.rank = comm->rank;
	eager_pkt->match.parts.tag = tag;
	eager_pkt->match.parts.context_id = comm->context_id + context_offset;
	eager_pkt->sender_req_id = MPI_REQUEST_NULL;
	eager_pkt->data_sz = 0;
	
	MPIDI_VC_FAI_send_seqnum(vc, seqnum);
	MPIDI_Pkt_set_seqnum(eager_pkt, seqnum);
	
	MPIU_THREAD_CS_ENTER(CH3COMM,vc);
	mpi_errno = MPIDI_CH3_iStartMsg(vc, eager_pkt, sizeof(*eager_pkt), &sreq);
	MPIU_THREAD_CS_EXIT(CH3COMM,vc);
	/* --BEGIN ERROR HANDLING-- */
	if (mpi_errno != MPI_SUCCESS)
	{
	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|eagermsg");
	}
	/* --END ERROR HANDLING-- */
	if (sreq != NULL)
	{
	    MPIDI_Request_set_seqnum(sreq, seqnum);
	    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
	    /* sreq->comm = comm;
	      MPIR_Comm_add_ref(comm); -- not necessary for blocking functions */
	}
	
	goto fn_exit;
    }

    MPIDI_CH3_GET_EAGER_THRESHOLD(&eager_threshold, comm, vc);

    /* FIXME: flow control: limit number of outstanding eager messages
       containing data and need to be buffered by the receiver */
#ifdef USE_EAGER_SHORT
    if (dt_contig && data_sz <= MPIDI_EAGER_SHORT_SIZE) {
	mpi_errno = MPIDI_CH3_EagerContigShortSend( &sreq, 
					       MPIDI_CH3_PKT_EAGERSHORT_SEND,
					       (char *)buf + dt_true_lb,
					       data_sz, rank, tag, comm, 
					       context_offset );
    }
    else
#endif

    if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <= eager_threshold)
    {
	if (dt_contig)
        {
 	    mpi_errno = MPIDI_CH3_EagerContigSend( &sreq, 
						   MPIDI_CH3_PKT_EAGER_SEND,
						   (char *)buf + dt_true_lb,
						   data_sz, rank, tag, comm, 
						   context_offset );
	}
	else
        {
	    MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
	    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
	    mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, 
                                                      MPIDI_CH3_PKT_EAGER_SEND,
                                                      buf, count, datatype,
                                                      data_sz, rank, tag, 
                                                      comm, context_offset );
	}
    }
    else
    {
예제 #10
0
int MPID_Isend(const void * buf, int count, MPI_Datatype datatype, int rank, 
	       int tag, MPID_Comm * comm, int context_offset,
               MPID_Request ** request)
{
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype * dt_ptr;
    MPID_Request * sreq;
    MPIDI_VC_t * vc=0;
#if defined(MPID_USE_SEQUENCE_NUMBERS)
    MPID_Seqnum_t seqnum;
#endif    
    int eager_threshold = -1;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_ISEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_ISEND);

    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
                  "rank=%d, tag=%d, context=%d", 
                  rank, tag, comm->context_id + context_offset));
    
    if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM)
    {
	mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, 
			    context_offset, MPIDI_REQUEST_TYPE_SEND, &sreq);
	goto fn_exit;
    }

    if (rank != MPI_PROC_NULL) {
        MPIDI_Comm_get_vc_set_active(comm, rank, &vc);
#ifdef ENABLE_COMM_OVERRIDES
        /* this needs to come before the sreq is created, since the override
         * function is responsible for creating its own request */
        if (vc->comm_ops && vc->comm_ops->isend)
        {
            mpi_errno = vc->comm_ops->isend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq);
            goto fn_exit;
        }
#endif
    }

    MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);

    if (rank == MPI_PROC_NULL)
    {
	MPIU_Object_set_ref(sreq, 1);
        MPID_cc_set(&sreq->cc, 0);
	goto fn_exit;
    }

    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, 
			    dt_true_lb);
    
    if (data_sz == 0)
    {
	MPIDI_CH3_Pkt_t upkt;
	MPIDI_CH3_Pkt_eager_send_t * const eager_pkt = &upkt.eager_send;

	MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_EAGER_MSG);
	sreq->dev.OnDataAvail = 0;
	    
	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message");
	MPIDI_Pkt_init(eager_pkt, MPIDI_CH3_PKT_EAGER_SEND);
	eager_pkt->match.parts.rank = comm->rank;
	eager_pkt->match.parts.tag = tag;
	eager_pkt->match.parts.context_id = comm->context_id + context_offset;
	eager_pkt->sender_req_id = sreq->handle;
	eager_pkt->data_sz = 0;
	
	MPIDI_VC_FAI_send_seqnum(vc, seqnum);
	MPIDI_Pkt_set_seqnum(eager_pkt, seqnum);
	MPIDI_Request_set_seqnum(sreq, seqnum);
	
	MPIU_THREAD_CS_ENTER(CH3COMM,vc);
	mpi_errno = MPIDI_CH3_iSend(vc, sreq, eager_pkt, sizeof(*eager_pkt));
	MPIU_THREAD_CS_EXIT(CH3COMM,vc);
	/* --BEGIN ERROR HANDLING-- */
	if (mpi_errno != MPI_SUCCESS)
	{
	    MPIU_Object_set_ref(sreq, 0);
	    MPIDI_CH3_Request_destroy(sreq);
	    sreq = NULL;
            MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3|eagermsg");
	    goto fn_exit;
	}
	/* --END ERROR HANDLING-- */

	goto fn_exit;
    }

    MPIDI_CH3_GET_EAGER_THRESHOLD(&eager_threshold, comm, vc);

    /* FIXME: flow control: limit number of outstanding eager messages
       containing data and need to be buffered by the receiver */
    if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <= eager_threshold)
    {
	if (dt_contig)
	{
	    mpi_errno = MPIDI_CH3_EagerContigIsend( &sreq, 
						    MPIDI_CH3_PKT_EAGER_SEND,
						    (char*)buf + dt_true_lb, 
						    data_sz, rank, tag, 
						    comm, context_offset );
	}
	else
	{
	    mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, 
                                                      MPIDI_CH3_PKT_EAGER_SEND,
                                                      buf, count, datatype,
                                                      data_sz, rank, tag, 
                                                      comm, context_offset );
	    /* If we're not complete, then add a reference to the datatype */
	    if (sreq && sreq->dev.OnDataAvail) {
		sreq->dev.datatype_ptr = dt_ptr;
		MPID_Datatype_add_ref(dt_ptr);
	    }
	}
    }
    else
    {
	/* Note that the sreq was created above */
	MPIDI_Request_set_msg_type( sreq, MPIDI_REQUEST_RNDV_MSG );
	mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig,
                                     data_sz, dt_true_lb, rank, tag, comm, 
                                     context_offset );
	/* FIXME: fill temporary IOV or pack temporary buffer after send to 
	   hide some latency.  This requires synchronization
           because the CTS packet could arrive and be processed before the 
	   above iStartmsg completes (depending on the progress
           engine, threads, etc.). */
	
	if (sreq && dt_ptr != NULL)
	{
	    sreq->dev.datatype_ptr = dt_ptr;
	    MPID_Datatype_add_ref(dt_ptr);
	}
    }

  fn_exit:
    *request = sreq;

    MPIU_DBG_STMT(CH3_OTHER,VERBOSE,
    {
	if (sreq != NULL)
	{
	    MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,"request allocated, handle=0x%08x", sreq->handle);
	}
    }
		  );
예제 #11
0
int MPIDI_CH3_PktHandler_RndvClrToSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
					MPIDI_msg_sz_t *buflen, MPID_Request **rreqp )
{
    MPIDI_CH3_Pkt_rndv_clr_to_send_t * cts_pkt = &pkt->rndv_clr_to_send;
    MPID_Request * sreq;
    MPID_Request * rts_sreq;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_rndv_send_t * rs_pkt = &upkt.rndv_send;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPIDI_msg_sz_t data_sz;
    MPID_Datatype * dt_ptr;
    int mpi_errno = MPI_SUCCESS;
    
    MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"received rndv CTS pkt");
    
    MPID_Request_get_ptr(cts_pkt->sender_req_id, sreq);
    MPIU_DBG_PRINTF(("received cts, count=%d\n", sreq->dev.user_count));

    sreq->dev.OnDataAvail = 0;
    sreq->dev.OnFinal = 0;

    /* Release the RTS request if one exists.  
       MPID_Request_fetch_and_clear_rts_sreq() needs to be atomic to 
       prevent
       cancel send from cancelling the wrong (future) request.  
       If MPID_Request_fetch_and_clear_rts_sreq() returns a NULL
       rts_sreq, then MPID_Cancel_send() is responsible for releasing 
       the RTS request object. */
    MPIDI_Request_fetch_and_clear_rts_sreq(sreq, &rts_sreq);
    if (rts_sreq != NULL)
    {
	MPID_Request_release(rts_sreq);
    }

    *buflen = sizeof(MPIDI_CH3_Pkt_t);

    MPIDI_Pkt_init(rs_pkt, MPIDI_CH3_PKT_RNDV_SEND);
    rs_pkt->receiver_req_id = cts_pkt->receiver_req_id;
    
    MPIDI_Datatype_get_info(sreq->dev.user_count, sreq->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
    
    if (dt_contig) 
    {
	MPID_IOV iov[MPID_IOV_LIMIT];

	MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
		    "sending contiguous rndv data, data_sz=" MPIDI_MSG_SZ_FMT, 
					    data_sz));
	
	iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)rs_pkt;
	iov[0].MPID_IOV_LEN = sizeof(*rs_pkt);
	
	iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)((char *)sreq->dev.user_buf + dt_true_lb);
	iov[1].MPID_IOV_LEN = data_sz;

        MPIU_THREAD_CS_ENTER(CH3COMM,vc);
	mpi_errno = MPIU_CALL(MPIDI_CH3,iSendv(vc, sreq, iov, 2));
        MPIU_THREAD_CS_EXIT(CH3COMM,vc);
	MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|senddata");
    }
    else
    {
	sreq->dev.segment_ptr = MPID_Segment_alloc( );
        MPIU_ERR_CHKANDJUMP1((sreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
	MPID_Segment_init(sreq->dev.user_buf, sreq->dev.user_count, 
			  sreq->dev.datatype, sreq->dev.segment_ptr, 0);
	sreq->dev.segment_first = 0;
	sreq->dev.segment_size = data_sz;

	MPIU_THREAD_CS_ENTER(CH3COMM,vc);
	mpi_errno = vc->sendNoncontig_fn(vc, sreq, rs_pkt, sizeof(*rs_pkt));
	MPIU_THREAD_CS_EXIT(CH3COMM,vc);
	MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|senddata");
    }    
    *rreqp = NULL;

 fn_fail:
    return mpi_errno;
}
예제 #12
0
/* MPIDI_CH3_RndvSend - Send a request to perform a rendezvous send */
int MPIDI_CH3_RndvSend( MPID_Request **sreq_p, const void * buf, int count, 
			MPI_Datatype datatype, int dt_contig, MPIDI_msg_sz_t data_sz, 
			MPI_Aint dt_true_lb,
			int rank, 
			int tag, MPID_Comm * comm, int context_offset )
{
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_rndv_req_to_send_t * const rts_pkt = &upkt.rndv_req_to_send;
    MPIDI_VC_t * vc;
    MPID_Request * rts_sreq;
    MPID_Request *sreq =*sreq_p;
    int          mpi_errno = MPI_SUCCESS;
	
    MPIU_DBG_MSG_D(CH3_OTHER,VERBOSE,
		   "sending rndv RTS, data_sz=" MPIDI_MSG_SZ_FMT, data_sz);

    sreq->dev.OnDataAvail = 0;
    
    sreq->partner_request = NULL;
	
    MPIDI_Pkt_init(rts_pkt, MPIDI_CH3_PKT_RNDV_REQ_TO_SEND);
    rts_pkt->match.parts.rank	      = comm->rank;
    rts_pkt->match.parts.tag	      = tag;
    rts_pkt->match.parts.context_id = comm->context_id + context_offset;
    rts_pkt->sender_req_id    = sreq->handle;
    rts_pkt->data_sz	      = data_sz;

    MPIDI_Comm_get_vc_set_active(comm, rank, &vc);
    MPIDI_VC_FAI_send_seqnum(vc, seqnum);
    MPIDI_Pkt_set_seqnum(rts_pkt, seqnum);
    MPIDI_Request_set_seqnum(sreq, seqnum);

    MPIU_DBG_MSGPKT(vc,tag,rts_pkt->match.parts.context_id,rank,data_sz,"Rndv");

    MPIU_THREAD_CS_ENTER(CH3COMM,vc);
    mpi_errno = MPIU_CALL(MPIDI_CH3,iStartMsg(vc, rts_pkt, sizeof(*rts_pkt), 
					      &rts_sreq));
    MPIU_THREAD_CS_EXIT(CH3COMM,vc);
    /* --BEGIN ERROR HANDLING-- */
    if (mpi_errno != MPI_SUCCESS)
    {
	MPIU_Object_set_ref(sreq, 0);
	MPIDI_CH3_Request_destroy(sreq);
	*sreq_p = NULL;
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rtspkt");
    }
    /* --END ERROR HANDLING-- */
    if (rts_sreq != NULL)
    {
	if (rts_sreq->status.MPI_ERROR != MPI_SUCCESS)
	{
	    MPIU_Object_set_ref(sreq, 0);
	    MPIDI_CH3_Request_destroy(sreq);
	    *sreq_p = NULL;
            mpi_errno = rts_sreq->status.MPI_ERROR;
            MPID_Request_release(rts_sreq);
            MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rtspkt");
	}
	MPID_Request_release(rts_sreq);
    }

    /* FIXME: fill temporary IOV or pack temporary buffer after send to hide 
       some latency.  This requires synchronization
       because the CTS packet could arrive and be processed before the above 
       iStartmsg completes (depending on the progress
       engine, threads, etc.). */

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
예제 #13
0
int MPIDI_CH3_PktHandler_RndvReqToSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
					MPIDI_msg_sz_t *buflen, MPID_Request **rreqp )
{
    MPID_Request * rreq;
    int found;
    MPIDI_CH3_Pkt_rndv_req_to_send_t * rts_pkt = &pkt->rndv_req_to_send;
    int mpi_errno = MPI_SUCCESS;
    
    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
 "received rndv RTS pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d, data_sz=" MPIDI_MSG_SZ_FMT,
	      rts_pkt->sender_req_id, rts_pkt->match.parts.rank, 
					rts_pkt->match.parts.tag, 
              rts_pkt->match.parts.context_id, rts_pkt->data_sz));
    MPIU_DBG_MSGPKT(vc,rts_pkt->match.parts.tag,rts_pkt->match.parts.context_id,
		    rts_pkt->match.parts.rank,rts_pkt->data_sz,
		    "ReceivedRndv");

    MPIU_THREAD_CS_ENTER(MSGQUEUE,);
    rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&rts_pkt->match, &found);
    MPIU_ERR_CHKANDJUMP1(!rreq, mpi_errno,MPI_ERR_OTHER, "**nomemreq", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp());
    
    set_request_info(rreq, rts_pkt, MPIDI_REQUEST_RNDV_MSG);

    MPIU_THREAD_CS_EXIT(MSGQUEUE,);

    *buflen = sizeof(MPIDI_CH3_Pkt_t);
    
    if (found)
    {
	MPID_Request * cts_req;
	MPIDI_CH3_Pkt_t upkt;
	MPIDI_CH3_Pkt_rndv_clr_to_send_t * cts_pkt = &upkt.rndv_clr_to_send;
	
	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"posted request found");
	
	/* FIXME: What if the receive user buffer is not big enough to
	   hold the data about to be cleared for sending? */
	
	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending rndv CTS packet");
	MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RNDV_CLR_TO_SEND);
	cts_pkt->sender_req_id = rts_pkt->sender_req_id;
	cts_pkt->receiver_req_id = rreq->handle;
        MPIU_THREAD_CS_ENTER(CH3COMM,vc);
	mpi_errno = MPIU_CALL(MPIDI_CH3,iStartMsg(vc, cts_pkt, 
						  sizeof(*cts_pkt), &cts_req));
        MPIU_THREAD_CS_EXIT(CH3COMM,vc);
	if (mpi_errno != MPI_SUCCESS) {
	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,
				"**ch3|ctspkt");
	}
	if (cts_req != NULL) {
	    MPID_Request_release(cts_req);
	}
    }
    else
    {
	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"unexpected request allocated");
	
	/*
	 * A MPID_Probe() may be waiting for the request we just 
	 * inserted, so we need to tell the progress engine to exit.
	 *
	 * FIXME: This will cause MPID_Progress_wait() to return to the
	 * MPI layer each time an unexpected RTS packet is
	 * received.  MPID_Probe() should atomically increment a
	 * counter and MPIDI_CH3_Progress_signal_completion()
	 * should only be called if that counter is greater than zero.
	 */
	MPIDI_CH3_Progress_signal_completion();
    }
    
    *rreqp = NULL;

 fn_fail:
    return mpi_errno;
}
예제 #14
0
/**
 * \brief The callback for a new RZV RTS
 * \note  Because this is a short message, the data is already received
 * \param[in]  context      The context on which the message is being received.
 * \param[in]  sender       The origin endpoint
 * \param[in]  _msginfo     The extended header information
 * \param[in]  msginfo_size The size of the extended header information
 * \param[in]  is_zero_byte The rendezvous message is zero bytes in length.
 */
void
MPIDI_RecvRzvCB_impl(pami_context_t    context,
                     pami_endpoint_t   sender,
                     const void      * _msginfo,
                     size_t            msginfo_size,
                     const unsigned    is_zero_byte)
{
  MPID_assert(_msginfo != NULL);
  MPID_assert(msginfo_size == sizeof(MPIDI_MsgEnvelope));
  const MPIDI_MsgEnvelope * envelope = (const MPIDI_MsgEnvelope *)_msginfo;
  const MPIDI_MsgInfo * msginfo = (const MPIDI_MsgInfo *)&envelope->msginfo;

  MPID_Request * rreq = NULL;
  int found;
  pami_task_t source;
#if TOKEN_FLOW_CONTROL
  int  rettoks=0;
#endif

  /* -------------------- */
  /*  Match the request.  */
  /* -------------------- */
  unsigned rank       = msginfo->MPIrank;
  unsigned tag        = msginfo->MPItag;
  unsigned context_id = msginfo->MPIctxt;

  MPID_Request *newreq = MPIDI_Request_create2();
  MPIU_THREAD_CS_ENTER(MSGQUEUE,0);
  source = PAMIX_Endpoint_query(sender);
  MPIDI_Receive_tokens(msginfo,source);
#ifndef OUT_OF_ORDER_HANDLING
  rreq = MPIDI_Recvq_FDP_or_AEU(newreq, rank, tag, context_id, &found);
#else
  rreq = MPIDI_Recvq_FDP_or_AEU(newreq, rank, source, tag, context_id, msginfo->MPIseqno, &found);
#endif
  TRACE_ERR("RZV CB for req=%p remote-mr=0x%llx bytes=%zu (%sfound)\n",
            rreq,
            *(unsigned long long*)&envelope->envelope.memregion,
            envelope->envelope.length,
            found?"":"not ");

  /* ---------------------- */
  /*  Copy in information.  */
  /* ---------------------- */
  rreq->status.MPI_SOURCE = rank;
  rreq->status.MPI_TAG    = tag;
  MPIR_STATUS_SET_COUNT(rreq->status, envelope->length);
  MPIDI_Request_setPeerRank_comm(rreq, rank);
  MPIDI_Request_setPeerRank_pami(rreq, source);
  MPIDI_Request_cpyPeerRequestH (rreq, msginfo);
  MPIDI_Request_setSync         (rreq, msginfo->isSync);
  MPIDI_Request_setRzv          (rreq, 1);

  /* ----------------------------------------------------- */
  /* Save the rendezvous information for when the target   */
  /* node calls a receive function and the data is         */
  /* retreived from the origin node.                       */
  /* ----------------------------------------------------- */
  if (is_zero_byte)
    {
      rreq->mpid.envelope.length = 0;
      rreq->mpid.envelope.data   = NULL;
    }
  else
    {
#ifdef USE_PAMI_RDMA
      memcpy(&rreq->mpid.envelope.memregion,
             &envelope->memregion,
             sizeof(pami_memregion_t));
#else
      rreq->mpid.envelope.memregion_used = envelope->memregion_used;
      if(envelope->memregion_used)
        {
          memcpy(&rreq->mpid.envelope.memregion,
                 &envelope->memregion,
                 sizeof(pami_memregion_t));
        }
      rreq->mpid.envelope.data   = envelope->data;
#endif
      rreq->mpid.envelope.length = envelope->length;
     TRACE_SET_R_VAL(source,(rreq->mpid.idx),req,rreq);
     TRACE_SET_R_VAL(source,(rreq->mpid.idx),rlen,envelope->length);
     TRACE_SET_R_VAL(source,(rreq->mpid.idx),fl.f.sync,msginfo->isSync);
     TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.rzv);
     if (TOKEN_FLOW_CONTROL_ON)
       {
         #if TOKEN_FLOW_CONTROL
         MPIDI_Must_return_tokens(context,source);
         #else
         MPID_assert_always(0);
         #endif
       }
    }
  /* ----------------------------------------- */
  /* figure out target buffer for request data */
  /* ----------------------------------------- */
  if (found)
    {
#if (MPIDI_STATISTICS)
       MPID_NSTAT(mpid_statp->earlyArrivalsMatched);
#endif
      /* --------------------------- */
      /* if synchronized, post ack.  */
      /* --------------------------- */
      if (unlikely(MPIDI_Request_isSync(rreq)))
        MPIDI_SyncAck_post(context, rreq, MPIDI_Request_getPeerRank_pami(rreq));

      MPIU_THREAD_CS_EXIT(MSGQUEUE,0);

      if (is_zero_byte)
        MPIDI_RecvRzvDoneCB_zerobyte(context, rreq, PAMI_SUCCESS);
      else
        {
          MPIDI_RendezvousTransfer(context, rreq);
          TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.sync_com_in_HH);
          TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.matchedInHH);
          TRACE_SET_R_VAL(source,(rreq->mpid.idx),bufadd,rreq->mpid.userbuf);
        }
      MPID_Request_discard(newreq);
    }

  /* ------------------------------------------------------------- */
  /* Request was not posted. */
  /* ------------------------------------------------------------- */
  else
    {
#if (MPIDI_STATISTICS)
       MPID_NSTAT(mpid_statp->earlyArrivals);
#endif
      /*
       * This is to test that the fields don't need to be
       * initialized.  Remove after this doesn't fail for a while.
       */
      MPID_assert(rreq->mpid.uebuf    == NULL);
      MPID_assert(rreq->mpid.uebuflen == 0);
      /* rreq->mpid.uebuf = NULL; */
      /* rreq->mpid.uebuflen = 0; */
#ifdef OUT_OF_ORDER_HANDLING
  if (MPIDI_In_cntr[source].n_OutOfOrderMsgs > 0) {
     MPIDI_Recvq_process_out_of_order_msgs(source, context);
  }
#endif
      MPIU_THREAD_CS_EXIT(MSGQUEUE,0);
    }
  /* ---------------------------------------- */
  /*  Signal that the recv has been started.  */
  /* ---------------------------------------- */
  MPIDI_Progress_signal();
}