int MPIDI_CH3_RecvRndv( MPIDI_VC_t * vc, MPID_Request *rreq ) { int mpi_errno = MPI_SUCCESS; /* A rendezvous request-to-send (RTS) message has arrived. We need to send a CTS message to the remote process. */ MPID_Request * cts_req; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_rndv_clr_to_send_t * cts_pkt = &upkt.rndv_clr_to_send; MPIU_DBG_MSG(CH3_OTHER,VERBOSE, "rndv RTS in the request, sending rndv CTS"); MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RNDV_CLR_TO_SEND); cts_pkt->sender_req_id = rreq->dev.sender_req_id; cts_pkt->receiver_req_id = rreq->handle; MPIU_THREAD_CS_ENTER(CH3COMM,vc); mpi_errno = MPIU_CALL(MPIDI_CH3,iStartMsg(vc, cts_pkt, sizeof(*cts_pkt), &cts_req)); MPIU_THREAD_CS_EXIT(CH3COMM,vc); if (mpi_errno != MPI_SUCCESS) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|ctspkt"); } if (cts_req != NULL) { /* FIXME: Ideally we could specify that a req not be returned. This would avoid our having to decrement the reference count on a req we don't want/need. */ MPID_Request_release(cts_req); } fn_fail: return mpi_errno; }
static inline void MPIDI_RecvShortCB(pami_context_t context, const void * _msginfo, const void * sndbuf, size_t sndlen, pami_endpoint_t sender, unsigned isSync) { MPID_assert(_msginfo != NULL); const MPIDI_MsgInfo *msginfo = (const MPIDI_MsgInfo *)_msginfo; MPID_Request * rreq = NULL; pami_task_t source; #if TOKEN_FLOW_CONTROL int rettoks=0; #endif /* -------------------- */ /* Match the request. */ /* -------------------- */ unsigned rank = msginfo->MPIrank; unsigned tag = msginfo->MPItag; unsigned context_id = msginfo->MPIctxt; MPIU_THREAD_CS_ENTER(MSGQUEUE,0); source = PAMIX_Endpoint_query(sender); MPIDI_Receive_tokens(msginfo,source); #ifndef OUT_OF_ORDER_HANDLING rreq = MPIDI_Recvq_FDP(rank, tag, context_id); #else rreq = MPIDI_Recvq_FDP(rank, source, tag, context_id, msginfo->MPIseqno); #endif /* Match not found */ if (unlikely(rreq == NULL)) { #if (MPIDI_STATISTICS) MPID_NSTAT(mpid_statp->earlyArrivals); #endif MPIU_THREAD_CS_EXIT(MSGQUEUE,0); MPID_Request *newreq = MPIDI_Request_create2(); MPID_assert(newreq != NULL); if (sndlen) { newreq->mpid.uebuflen = sndlen; if (!TOKEN_FLOW_CONTROL_ON) { newreq->mpid.uebuf = MPL_malloc(sndlen); newreq->mpid.uebuf_malloc = mpiuMalloc; } else { #if TOKEN_FLOW_CONTROL MPIU_THREAD_CS_ENTER(MSGQUEUE,0); newreq->mpid.uebuf = MPIDI_mm_alloc(sndlen); newreq->mpid.uebuf_malloc = mpidiBufMM; MPIU_THREAD_CS_EXIT(MSGQUEUE,0); #else MPID_assert_always(0); #endif } MPID_assert(newreq->mpid.uebuf != NULL); } MPIU_THREAD_CS_ENTER(MSGQUEUE,0); #ifndef OUT_OF_ORDER_HANDLING rreq = MPIDI_Recvq_FDP(rank, tag, context_id); #else rreq = MPIDI_Recvq_FDP(rank, PAMIX_Endpoint_query(sender), tag, context_id, msginfo->MPIseqno); #endif if (unlikely(rreq == NULL)) { MPIDI_Callback_process_unexp(newreq, context, msginfo, sndlen, sender, sndbuf, NULL, isSync); /* request is always complete now */ if (TOKEN_FLOW_CONTROL_ON && sndlen) { #if TOKEN_FLOW_CONTROL MPIDI_Token_cntr[source].unmatched++; #else MPID_assert_always(0); #endif } MPIU_THREAD_CS_EXIT(MSGQUEUE,0); MPID_Request_release(newreq); goto fn_exit_short; } else { MPIU_THREAD_CS_EXIT(MSGQUEUE,0); MPID_Request_discard(newreq); } } else { #if (MPIDI_STATISTICS) MPID_NSTAT(mpid_statp->earlyArrivalsMatched); #endif if (TOKEN_FLOW_CONTROL_ON && sndlen) { #if TOKEN_FLOW_CONTROL MPIDI_Update_rettoks(source); MPIDI_Must_return_tokens(context,source); #else MPID_assert_always(0); #endif } MPIU_THREAD_CS_EXIT(MSGQUEUE,0); } /* the receive queue processing has been completed and we found match*/ /* ---------------------- */ /* Copy in information. */ /* ---------------------- */ rreq->status.MPI_SOURCE = rank; rreq->status.MPI_TAG = tag; MPIR_STATUS_SET_COUNT(rreq->status, sndlen); MPIDI_Request_setCA (rreq, MPIDI_CA_COMPLETE); MPIDI_Request_cpyPeerRequestH(rreq, msginfo); MPIDI_Request_setSync (rreq, isSync); MPIDI_Request_setRzv (rreq, 0); /* ----------------------------- */ /* Request was already posted. */ /* ----------------------------- */ if (unlikely(isSync)) MPIDI_SyncAck_post(context, rreq, PAMIX_Endpoint_query(sender)); if (unlikely(HANDLE_GET_KIND(rreq->mpid.datatype) != HANDLE_KIND_BUILTIN)) { MPIDI_Callback_process_userdefined_dt(context, sndbuf, sndlen, rreq); goto fn_exit_short; } size_t dt_size = rreq->mpid.userbufcount * MPID_Datatype_get_basic_size(rreq->mpid.datatype); /* ----------------------------- */ /* Test for truncated message. */ /* ----------------------------- */ if (unlikely(sndlen > dt_size)) { #if ASSERT_LEVEL > 0 MPIDI_Callback_process_trunc(context, rreq, NULL, sndbuf); goto fn_exit_short; #else sndlen = dt_size; #endif } MPID_assert(rreq->mpid.uebuf == NULL); MPID_assert(rreq->mpid.uebuflen == 0); void* rcvbuf = rreq->mpid.userbuf; if (sndlen > 0) { #if CUDA_AWARE_SUPPORT if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(rcvbuf)) { cudaError_t cudaerr = CudaMemcpy(rcvbuf, sndbuf, (size_t)sndlen, cudaMemcpyHostToDevice); } else #endif memcpy(rcvbuf, sndbuf, sndlen); } TRACE_SET_R_VAL(source,(rreq->mpid.idx),rlen,sndlen); TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.comp_in_HH); TRACE_SET_R_VAL(source,(rreq->mpid.idx),bufadd,rreq->mpid.userbuf); MPIDI_Request_complete(rreq); fn_exit_short: #ifdef OUT_OF_ORDER_HANDLING MPIU_THREAD_CS_ENTER(MSGQUEUE,0); if (MPIDI_In_cntr[source].n_OutOfOrderMsgs>0) { MPIDI_Recvq_process_out_of_order_msgs(source, context); } MPIU_THREAD_CS_EXIT(MSGQUEUE,0); #endif /* ---------------------------------------- */ /* Signal that the recv has been started. */ /* ---------------------------------------- */ MPIDI_Progress_signal(); }
/*@ MPI_Init_thread - Initialize the MPI execution environment Input Parameters: + argc - Pointer to the number of arguments . argv - Pointer to the argument vector - required - Level of desired thread support Output Parameter: . provided - Level of provided thread support Command line arguments: MPI specifies no command-line arguments but does allow an MPI implementation to make use of them. See 'MPI_INIT' for a description of the command line arguments supported by 'MPI_INIT' and 'MPI_INIT_THREAD'. Notes: The valid values for the level of thread support are\: + MPI_THREAD_SINGLE - Only one thread will execute. . MPI_THREAD_FUNNELED - The process may be multi-threaded, but only the main thread will make MPI calls (all MPI calls are funneled to the main thread). . MPI_THREAD_SERIALIZED - The process may be multi-threaded, and multiple threads may make MPI calls, but only one at a time: MPI calls are not made concurrently from two distinct threads (all MPI calls are serialized). - MPI_THREAD_MULTIPLE - Multiple threads may call MPI, with no restrictions. Notes for Fortran: Note that the Fortran binding for this routine does not have the 'argc' and 'argv' arguments. ('MPI_INIT_THREAD(required, provided, ierror)') .N Errors .N MPI_SUCCESS .N MPI_ERR_OTHER .seealso: MPI_Init, MPI_Finalize @*/ int MPI_Init_thread( int *argc, char ***argv, int required, int *provided ) { int mpi_errno = MPI_SUCCESS; int rc, reqd = required; MPID_MPI_INIT_STATE_DECL(MPID_STATE_MPI_INIT_THREAD); rc = MPID_Wtime_init(); #ifdef USE_DBG_LOGGING MPIU_DBG_PreInit( argc, argv, rc ); #endif MPID_MPI_INIT_FUNC_ENTER(MPID_STATE_MPI_INIT_THREAD); #if defined(_OSU_MVAPICH_) || defined(_OSU_PSM_) MV2_Read_env_vars(); #endif /* defined(_OSU_MVAPICH_) || defined(_OSU_PSM_) */ # ifdef HAVE_ERROR_CHECKING { MPID_BEGIN_ERROR_CHECKS; { if (MPIR_Process.initialized != MPICH_PRE_INIT) { mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, "MPI_Init_thread", __LINE__, MPI_ERR_OTHER, "**inittwice", 0 ); } if (mpi_errno != MPI_SUCCESS) goto fn_fail; } MPID_END_ERROR_CHECKS; } # endif /* HAVE_ERROR_CHECKING */ /* ... body of routine ... */ /* If the user requested for asynchronous progress, request for * THREAD_MULTIPLE. */ rc = 0; MPL_env2bool("MPICH_ASYNC_PROGRESS", &rc); if (rc) reqd = MPI_THREAD_MULTIPLE; mpi_errno = MPIR_Init_thread( argc, argv, reqd, provided ); if (mpi_errno != MPI_SUCCESS) goto fn_fail; if (rc && *provided == MPI_THREAD_MULTIPLE) { mpi_errno = MPIR_Init_async_thread(); if (mpi_errno) goto fn_fail; MPIR_async_thread_initialized = 1; } /* ... end of body of routine ... */ MPID_MPI_INIT_FUNC_EXIT(MPID_STATE_MPI_INIT_THREAD); return mpi_errno; fn_fail: /* --BEGIN ERROR HANDLING-- */ # ifdef HAVE_ERROR_REPORTING { mpi_errno = MPIR_Err_create_code( mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**mpi_init_thread", "**mpi_init_thread %p %p %d %p", argc, argv, required, provided); } # endif mpi_errno = MPIR_Err_return_comm( 0, FCNAME, mpi_errno ); MPID_MPI_INIT_FUNC_EXIT(MPID_STATE_MPI_INIT_THREAD); MPIU_THREAD_CS_EXIT(INIT,*provided); return mpi_errno; /* --END ERROR HANDLING-- */ }
int MPIR_Init_thread(int * argc, char ***argv, int required, int * provided) { int mpi_errno = MPI_SUCCESS; int has_args; int has_env; int thread_provided; int exit_init_cs_on_failure = 0; /* For any code in the device that wants to check for runtime decisions on the value of isThreaded, set a provisional value here. We could let the MPID_Init routine override this */ #ifdef HAVE_RUNTIME_THREADCHECK MPIR_ThreadInfo.isThreaded = required == MPI_THREAD_MULTIPLE; #endif MPIU_THREAD_CS_INIT; /* FIXME: Move to os-dependent interface? */ #ifdef HAVE_WINDOWS_H /* prevent the process from bringing up an error message window if mpich asserts */ _CrtSetReportMode( _CRT_ASSERT, _CRTDBG_MODE_FILE ); _CrtSetReportFile( _CRT_ASSERT, _CRTDBG_FILE_STDERR ); _CrtSetReportHook2(_CRT_RPTHOOK_INSTALL, assert_hook); #ifdef _WIN64 { /* FIXME: (Windows) This severly degrades performance but fixes alignment issues with the datatype code. */ /* Prevent misaligned faults on Win64 machines */ UINT mode, old_mode; old_mode = SetErrorMode(SEM_NOALIGNMENTFAULTEXCEPT); mode = old_mode | SEM_NOALIGNMENTFAULTEXCEPT; SetErrorMode(mode); } #endif #endif /* We need this inorder to implement IS_THREAD_MAIN */ # if (MPICH_THREAD_LEVEL >= MPI_THREAD_SERIALIZED) && defined(MPICH_IS_THREADED) { MPID_Thread_self(&MPIR_ThreadInfo.master_thread); } # endif #ifdef HAVE_ERROR_CHECKING /* Because the PARAM system has not been initialized, temporarily uncondtionally enable error checks. Once the PARAM system is initialized, this may be reset */ MPIR_Process.do_error_checks = 1; #else MPIR_Process.do_error_checks = 0; #endif /* Initialize necessary subsystems and setup the predefined attribute values. Subsystems may change these values. */ MPIR_Process.attrs.appnum = -1; MPIR_Process.attrs.host = 0; MPIR_Process.attrs.io = 0; MPIR_Process.attrs.lastusedcode = MPI_ERR_LASTCODE; MPIR_Process.attrs.tag_ub = 0; MPIR_Process.attrs.universe = MPIR_UNIVERSE_SIZE_NOT_SET; MPIR_Process.attrs.wtime_is_global = 0; /* Set the functions used to duplicate attributes. These are when the first corresponding keyval is created */ MPIR_Process.attr_dup = 0; MPIR_Process.attr_free = 0; #ifdef HAVE_CXX_BINDING /* Set the functions used to call functions in the C++ binding for reductions and attribute operations. These are null until a C++ operation is defined. This allows the C code that implements these operations to not invoke a C++ code directly, which may force the inclusion of symbols known only to the C++ compiler (e.g., under more non-GNU compilers, including Solaris and IRIX). */ MPIR_Process.cxx_call_op_fn = 0; #endif /* This allows the device to select an alternative function for dimsCreate */ MPIR_Process.dimsCreate = 0; /* "Allocate" from the reserved space for builtin communicators and (partially) initialize predefined communicators. comm_parent is intially NULL and will be allocated by the device if the process group was started using one of the MPI_Comm_spawn functions. */ MPIR_Process.comm_world = MPID_Comm_builtin + 0; MPIR_Comm_init(MPIR_Process.comm_world); MPIR_Process.comm_world->handle = MPI_COMM_WORLD; MPIR_Process.comm_world->context_id = 0 << MPID_CONTEXT_PREFIX_SHIFT; MPIR_Process.comm_world->recvcontext_id = 0 << MPID_CONTEXT_PREFIX_SHIFT; MPIR_Process.comm_world->comm_kind = MPID_INTRACOMM; /* This initialization of the comm name could be done only when comm_get_name is called */ MPIU_Strncpy(MPIR_Process.comm_world->name, "MPI_COMM_WORLD", MPI_MAX_OBJECT_NAME); MPIR_Process.comm_self = MPID_Comm_builtin + 1; MPIR_Comm_init(MPIR_Process.comm_self); MPIR_Process.comm_self->handle = MPI_COMM_SELF; MPIR_Process.comm_self->context_id = 1 << MPID_CONTEXT_PREFIX_SHIFT; MPIR_Process.comm_self->recvcontext_id = 1 << MPID_CONTEXT_PREFIX_SHIFT; MPIR_Process.comm_self->comm_kind = MPID_INTRACOMM; MPIU_Strncpy(MPIR_Process.comm_self->name, "MPI_COMM_SELF", MPI_MAX_OBJECT_NAME); #ifdef MPID_NEEDS_ICOMM_WORLD MPIR_Process.icomm_world = MPID_Comm_builtin + 2; MPIR_Comm_init(MPIR_Process.icomm_world); MPIR_Process.icomm_world->handle = MPIR_ICOMM_WORLD; MPIR_Process.icomm_world->context_id = 2 << MPID_CONTEXT_PREFIX_SHIFT; MPIR_Process.icomm_world->recvcontext_id= 2 << MPID_CONTEXT_PREFIX_SHIFT; MPIR_Process.icomm_world->comm_kind = MPID_INTRACOMM; MPIU_Strncpy(MPIR_Process.icomm_world->name, "MPI_ICOMM_WORLD", MPI_MAX_OBJECT_NAME); /* Note that these communicators are not ready for use - MPID_Init will setup self and world, and icomm_world if it desires it. */ #endif MPIR_Process.comm_parent = NULL; /* Setup the initial communicator list in case we have enabled the debugger message-queue interface */ MPIR_COMML_REMEMBER( MPIR_Process.comm_world ); MPIR_COMML_REMEMBER( MPIR_Process.comm_self ); /* Call any and all MPID_Init type functions */ MPIR_Err_init(); MPIR_Datatype_init(); MPIR_Group_init(); /* MPIU_Timer_pre_init(); */ mpi_errno = MPIR_Param_init_params(); if (mpi_errno) MPIU_ERR_POP(mpi_errno); /* Wait for debugger to attach if requested. */ if (MPIR_PARAM_DEBUG_HOLD) { volatile int hold = 1; while (hold) #ifdef HAVE_USLEEP usleep(100); #endif ; } #if HAVE_ERROR_CHECKING == MPID_ERROR_LEVEL_RUNTIME MPIR_Process.do_error_checks = MPIR_PARAM_ERROR_CHECKING; #endif /* define MPI as initialized so that we can use MPI functions within MPID_Init if necessary */ MPIR_Process.initialized = MPICH_WITHIN_MPI; /* We can't acquire any critical sections until this point. Any * earlier the basic data structures haven't been initialized */ MPIU_THREAD_CS_ENTER(INIT,required); exit_init_cs_on_failure = 1; mpi_errno = MPID_Init(argc, argv, required, &thread_provided, &has_args, &has_env); if (mpi_errno) MPIU_ERR_POP(mpi_errno); /* Capture the level of thread support provided */ MPIR_ThreadInfo.thread_provided = thread_provided; if (provided) *provided = thread_provided; #ifdef HAVE_RUNTIME_THREADCHECK MPIR_ThreadInfo.isThreaded = (thread_provided == MPI_THREAD_MULTIPLE); #endif /* FIXME: Define these in the interface. Does Timer init belong here? */ MPIU_dbg_init(MPIR_Process.comm_world->rank); MPIU_Timer_init(MPIR_Process.comm_world->rank, MPIR_Process.comm_world->local_size); #ifdef USE_MEMORY_TRACING MPIU_trinit( MPIR_Process.comm_world->rank ); /* Indicate that we are near the end of the init step; memory allocated already will have an id of zero; this helps separate memory leaks in the initialization code from leaks in the "active" code */ /* Uncomment this code to leave out any of the MPID_Init/etc memory allocations from the memory leak testing */ /* MPIU_trid( 1 ); */ #endif #ifdef USE_DBG_LOGGING MPIU_DBG_Init( argc, argv, has_args, has_env, MPIR_Process.comm_world->rank ); #endif /* Initialize the C versions of the Fortran link-time constants. We now initialize the Fortran symbols from within the Fortran interface in the routine that first needs the symbols. This fixes a problem with symbols added by a Fortran compiler that are not part of the C runtime environment (the Portland group compilers would do this) */ #if defined(HAVE_FORTRAN_BINDING) && defined(HAVE_MPI_F_INIT_WORKS_WITH_C) mpirinitf_(); #endif /* FIXME: Does this need to come before the call to MPID_InitComplete? For some debugger support, MPIR_WaitForDebugger may want to use MPI communication routines to collect information for the debugger */ #ifdef HAVE_DEBUGGER_SUPPORT MPIR_WaitForDebugger(); #endif /* Let the device know that the rest of the init process is completed */ if (mpi_errno == MPI_SUCCESS) mpi_errno = MPID_InitCompleted(); #if defined(_OSU_MVAPICH_) || defined(_OSU_PSM_) if (is_shmem_collectives_enabled()){ if (check_split_comm(pthread_self())){ int my_id, size; PMPI_Comm_rank(MPI_COMM_WORLD, &my_id); PMPI_Comm_size(MPI_COMM_WORLD, &size); disable_split_comm(pthread_self()); create_2level_comm(MPI_COMM_WORLD, size, my_id); enable_split_comm(pthread_self()); } } #endif /* defined(_OSU_MVAPICH_) || defined(_OSU_PSM_) */ fn_exit: MPIU_THREAD_CS_EXIT(INIT,required); return mpi_errno; fn_fail: /* --BEGIN ERROR HANDLING-- */ /* signal to error handling routines that core services are unavailable */ MPIR_Process.initialized = MPICH_PRE_INIT; if (exit_init_cs_on_failure) { MPIU_THREAD_CS_EXIT(INIT,required); } MPIU_THREAD_CS_FINALIZE; return mpi_errno; /* --END ERROR HANDLING-- */ }
int MPID_nem_newmad_poll(int in_blocking_poll) { nm_sr_request_t *p_request = NULL; nm_tag_t match_info = 0; int mpi_errno = MPI_SUCCESS; nm_sr_send_success(mpid_nem_newmad_session, &p_request); if (p_request != NULL) { MPID_nem_newmad_unified_req_t *ref; MPID_Request *req; MPID_Request_kind_t kind; MPIR_Context_id_t ctxt; nm_sr_get_stag(mpid_nem_newmad_session,p_request, &match_info); NEM_NMAD_MATCH_GET_CTXT(match_info, ctxt); nm_sr_get_ref(mpid_nem_newmad_session,p_request,(void *)&ref); req = &(ref->mpi_req); MPIU_Assert(req != NULL); kind = req->kind; if(ctxt == NEM_NMAD_INTRA_CTXT) { if ((kind == MPID_REQUEST_SEND) || (kind == MPID_PREQUEST_SEND)) { MPID_nem_newmad_handle_sreq(req); } } else { if ((kind == MPID_REQUEST_SEND) || (kind == MPID_PREQUEST_SEND)) { MPIU_Assert(MPIDI_Request_get_type(req) != MPIDI_REQUEST_TYPE_GET_RESP); MPID_nem_newmad_handle_sreq(req); } } } nm_sr_recv_success(mpid_nem_newmad_session, &p_request); if (p_request != NULL) { MPID_nem_newmad_unified_req_t *ref; MPID_Request *req; MPID_Request_kind_t kind; MPIR_Context_id_t ctxt; size_t size; nm_sr_get_ref(mpid_nem_newmad_session,p_request,(void *)&ref); req = &(ref->mpi_req); MPIU_Assert(req != NULL); kind = req->kind; nm_sr_get_size(mpid_nem_newmad_session, p_request, &size); nm_sr_get_rtag(mpid_nem_newmad_session,p_request, &match_info); NEM_NMAD_MATCH_GET_CTXT(match_info, ctxt); if(ctxt == NEM_NMAD_INTRA_CTXT) { MPID_nem_newmad_internal_req_t *adi_req = &(ref->nem_newmad_req); if (kind == MPID_REQUEST_RECV) { if (size <= sizeof(MPIDI_CH3_PktGeneric_t)) { MPID_nem_handle_pkt(adi_req->vc,(char *)&(adi_req->pending_pkt),(MPIDI_msg_sz_t)(size)); } else { MPID_nem_handle_pkt(adi_req->vc,(char *)(adi_req->tmpbuf),(MPIDI_msg_sz_t)(adi_req->tmpbuf_sz)); MPIU_Free(adi_req->tmpbuf); } nm_core_disable_progression(mpid_nem_newmad_session->p_core); MPID_nem_newmad_internal_req_enqueue(adi_req); nm_core_enable_progression(mpid_nem_newmad_session->p_core); } else { MPIU_Assert(0); } } else { if ((kind == MPID_REQUEST_RECV) || (kind == MPID_PREQUEST_RECV)) { int found = FALSE; nm_sr_request_t *nmad_request = NULL; MPIU_Assert(MPIDI_Request_get_type(req) != MPIDI_REQUEST_TYPE_GET_RESP); MPIU_THREAD_CS_ENTER(MSGQUEUE,req); MPID_NEM_NMAD_GET_REQ_FROM_HASH(req,nmad_request); if(nmad_request != NULL) { MPIU_Assert(req->dev.match.parts.rank == MPI_ANY_SOURCE); MPIU_Free(nmad_request); } found = MPIDI_CH3U_Recvq_DP(req); if(found){ MPID_nem_newmad_handle_rreq(req,match_info,size); } MPIU_THREAD_CS_EXIT(MSGQUEUE,req); } else { fprintf(stdout, ">>>>>>>>>>>>> ERROR: Wrong req type : %i (%p)\n",(int)kind,req); MPIU_Assert(0); } } } fn_exit: return mpi_errno; fn_fail: ATTRIBUTE((unused)) goto fn_exit; }
int MPID_Isend(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, MPID_Request ** request) { MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype * dt_ptr; MPID_Request * sreq; MPIDI_VC_t * vc=0; #if defined(MPID_USE_SEQUENCE_NUMBERS) MPID_Seqnum_t seqnum; #endif int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_ISEND); MPIDI_FUNC_ENTER(MPID_STATE_MPID_ISEND); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "rank=%d, tag=%d, context=%d", rank, tag, comm->context_id + context_offset)); if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM) { #if defined (_OSU_PSM_) goto skip_self_send; /* psm will internally do self-send, no special handling is needed here */ #endif /* _OSU_PSM_ */ mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_SEND, &sreq); goto fn_exit; } #if defined (_OSU_PSM_) skip_self_send: #endif if (rank != MPI_PROC_NULL) { MPIDI_Comm_get_vc_set_active(comm, rank, &vc); #ifdef ENABLE_COMM_OVERRIDES /* this needs to come before the sreq is created, since the override * function is responsible for creating its own request */ if (vc->comm_ops && vc->comm_ops->isend) { mpi_errno = vc->comm_ops->isend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq); goto fn_exit; } #endif } MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND); if (rank == MPI_PROC_NULL) { MPIU_Object_set_ref(sreq, 1); MPID_cc_set(&sreq->cc, 0); goto fn_exit; } MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); if (data_sz == 0) { #if defined (_OSU_PSM_) goto eager_send; #endif /* _OSU_PSM_ */ MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eager_send_t * const eager_pkt = &upkt.eager_send; MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_EAGER_MSG); sreq->dev.OnDataAvail = 0; MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message"); MPIDI_Pkt_init(eager_pkt, MPIDI_CH3_PKT_EAGER_SEND); eager_pkt->match.parts.rank = comm->rank; eager_pkt->match.parts.tag = tag; eager_pkt->match.parts.context_id = comm->context_id + context_offset; eager_pkt->sender_req_id = sreq->handle; eager_pkt->data_sz = 0; MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(eager_pkt, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); MPIU_THREAD_CS_ENTER(CH3COMM,vc); mpi_errno = MPIU_CALL(MPIDI_CH3,iSend(vc, sreq, eager_pkt, sizeof(*eager_pkt))); MPIU_THREAD_CS_EXIT(CH3COMM,vc); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { MPIU_Object_set_ref(sreq, 0); MPIDI_CH3_Request_destroy(sreq); sreq = NULL; MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3|eagermsg"); goto fn_exit; } /* --END ERROR HANDLING-- */ goto fn_exit; } #if defined (_OSU_PSM_) if(HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) { sreq->dev.datatype_ptr = dt_ptr; MPID_Datatype_add_ref(dt_ptr); sreq->psm_flags |= PSM_NEED_DTYPE_RELEASE; } if(vc->force_eager) goto eager_send; #endif /* _OSU_PSM_ */ #if defined(_OSU_MVAPICH_) int i; for (i = 0 ; i < rdma_num_extra_polls; i++) { if (rdma_global_ext_sendq_size > 1) MPID_Progress_test(); } #endif /* FIXME: flow control: limit number of outstanding eager messsages containing data and need to be buffered by the receiver */ #if defined(_OSU_MVAPICH_) if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <= vc->eager_max_msg_sz && !vc->force_rndv) #else /* defined(_OSU_MVAPICH_) */ if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <= vc->eager_max_msg_sz) #endif /* defined(_OSU_MVAPICH_) */ { #if defined (_OSU_PSM_) eager_send: #endif /* _OSU_PSM */ if (dt_contig) { mpi_errno = MPIDI_CH3_EagerContigIsend( &sreq, MPIDI_CH3_PKT_EAGER_SEND, (char*)buf + dt_true_lb, data_sz, rank, tag, comm, context_offset ); } else { #if defined (_OSU_PSM_) sreq->psm_flags |= PSM_NON_BLOCKING_SEND; #endif mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, MPIDI_CH3_PKT_EAGER_SEND, buf, count, datatype, data_sz, rank, tag, comm, context_offset ); #if defined (_OSU_PSM_) goto fn_exit; #endif /* If we're not complete, then add a reference to the datatype */ if (sreq && sreq->dev.OnDataAvail) { sreq->dev.datatype_ptr = dt_ptr; MPID_Datatype_add_ref(dt_ptr); } } } else { /* Note that the sreq was created above */ MPIDI_Request_set_msg_type( sreq, MPIDI_REQUEST_RNDV_MSG ); mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig, data_sz, dt_true_lb, rank, tag, comm, context_offset ); /* FIXME: fill temporary IOV or pack temporary buffer after send to hide some latency. This requires synchronization because the CTS packet could arrive and be processed before the above iStartmsg completes (depending on the progress engine, threads, etc.). */ #if defined(_OSU_MVAPICH_) /* rndv transfers need to process CTS packet to initiate the actual RDMA transfer */ MPID_Progress_test(); #endif /* defined(_OSU_MVAPICH_) */ if (sreq && dt_ptr != NULL) { sreq->dev.datatype_ptr = dt_ptr; MPID_Datatype_add_ref(dt_ptr); } } fn_exit: *request = sreq; #if defined(_OSU_MVAPICH_) for (i = 0 ; i < rdma_num_extra_polls; i++) { if (rdma_global_ext_sendq_size > 1) MPID_Progress_test(); } #endif MPIU_DBG_STMT(CH3_OTHER,VERBOSE, { if (sreq != NULL) { MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,"request allocated, handle=0x%08x", sreq->handle); } } );
/*@ MPI_Win_get_errhandler - Get the error handler for the MPI RMA window Input Parameters: . win - window (handle) Output Parameters: . errhandler - error handler currently associated with window (handle) .N ThreadSafe .N Fortran .N Errors .N MPI_SUCCESS .N MPI_ERR_WIN .N MPI_ERR_OTHER @*/ int MPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler *errhandler) { #ifdef HAVE_ERROR_CHECKING static const char FCNAME[] = "MPI_Win_get_errhandler"; #endif int mpi_errno = MPI_SUCCESS; MPID_Win *win_ptr = NULL; MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_GET_ERRHANDLER); MPIR_ERRTEST_INITIALIZED_ORDIE(); MPIU_THREAD_CS_ENTER(ALLFUNC,); MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_GET_ERRHANDLER); /* Validate parameters, especially handles needing to be converted */ # ifdef HAVE_ERROR_CHECKING { MPID_BEGIN_ERROR_CHECKS; { MPIR_ERRTEST_WIN(win, mpi_errno); } MPID_END_ERROR_CHECKS; } # endif /* Convert MPI object handles to object pointers */ MPID_Win_get_ptr( win, win_ptr ); /* Validate parameters and objects (post conversion) */ # ifdef HAVE_ERROR_CHECKING { MPID_BEGIN_ERROR_CHECKS; { MPIR_ERRTEST_ARGNULL(errhandler,"errhandler",mpi_errno); /* Validate win_ptr */ MPID_Win_valid_ptr( win_ptr, mpi_errno ); /* If win_ptr is not valid, it will be reset to null */ if (mpi_errno) goto fn_fail; } MPID_END_ERROR_CHECKS; } # endif /* HAVE_ERROR_CHECKING */ /* ... body of routine ... */ MPIU_THREAD_CS_ENTER(MPI_OBJ, win_ptr); if (win_ptr->errhandler) { *errhandler = win_ptr->errhandler->handle; MPIR_Errhandler_add_ref(win_ptr->errhandler); } else { /* Use the default */ *errhandler = MPI_ERRORS_ARE_FATAL; } MPIU_THREAD_CS_EXIT(MPI_OBJ, win_ptr); /* ... end of body of routine ... */ #ifdef HAVE_ERROR_CHECKING fn_exit: #endif MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_GET_ERRHANDLER); MPIU_THREAD_CS_EXIT(ALLFUNC,); return mpi_errno; /* --BEGIN ERROR HANDLING-- */ # ifdef HAVE_ERROR_CHECKING fn_fail: { mpi_errno = MPIR_Err_create_code( mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**mpi_win_get_errhandler", "**mpi_win_get_errhandler %W %p", win, errhandler); } mpi_errno = MPIR_Err_return_win(win_ptr, FCNAME, mpi_errno); goto fn_exit; # endif /* --END ERROR HANDLING-- */ }
int MPID_Irsend(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, MPID_Request ** request) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_ready_send_t * const ready_pkt = &upkt.ready_send; MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype * dt_ptr; MPID_Request * sreq; MPIDI_VC_t * vc; #if defined(MPID_USE_SEQUENCE_NUMBERS) MPID_Seqnum_t seqnum; #endif int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_IRSEND); MPIDI_FUNC_ENTER(MPID_STATE_MPID_IRSEND); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "rank=%d, tag=%d, context=%d", rank, tag, comm->context_id + context_offset)); if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM) { mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_RSEND, &sreq); goto fn_exit; } MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_RSEND); MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_EAGER_MSG); if (rank == MPI_PROC_NULL) { MPIU_Object_set_ref(sreq, 1); MPID_cc_set(&sreq->cc, 0); goto fn_exit; } MPIDI_Comm_get_vc_set_active(comm, rank, &vc); #ifdef ENABLE_COMM_OVERRIDES if (vc->comm_ops && vc->comm_ops->irsend) { mpi_errno = vc->comm_ops->irsend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq); goto fn_exit; } #endif MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); MPIDI_Pkt_init(ready_pkt, MPIDI_CH3_PKT_READY_SEND); ready_pkt->match.parts.rank = comm->rank; ready_pkt->match.parts.tag = tag; ready_pkt->match.parts.context_id = comm->context_id + context_offset; ready_pkt->sender_req_id = MPI_REQUEST_NULL; ready_pkt->data_sz = data_sz; if (data_sz == 0) { MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message"); sreq->dev.OnDataAvail = 0; MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(ready_pkt, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); MPIU_THREAD_CS_ENTER(CH3COMM,vc); mpi_errno = MPIDI_CH3_iSend(vc, sreq, ready_pkt, sizeof(*ready_pkt)); MPIU_THREAD_CS_EXIT(CH3COMM,vc); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { MPIU_Object_set_ref(sreq, 0); MPIDI_CH3_Request_destroy(sreq); sreq = NULL; MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3|eagermsg"); goto fn_exit; } /* --END ERROR HANDLING-- */ goto fn_exit; } if (vc->ready_eager_max_msg_sz < 0 || data_sz + sizeof(MPIDI_CH3_Pkt_ready_send_t) <= vc->ready_eager_max_msg_sz) { if (dt_contig) { mpi_errno = MPIDI_CH3_EagerContigIsend( &sreq, MPIDI_CH3_PKT_READY_SEND, (char*)buf + dt_true_lb, data_sz, rank, tag, comm, context_offset ); } else { mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, MPIDI_CH3_PKT_READY_SEND, buf, count, datatype, data_sz, rank, tag, comm, context_offset ); /* If we're not complete, then add a reference to the datatype */ if (sreq && sreq->dev.OnDataAvail) { sreq->dev.datatype_ptr = dt_ptr; MPID_Datatype_add_ref(dt_ptr); } } } else { /* Do rendezvous. This will be sent as a regular send not as a ready send, so the receiver won't know to send an error if the receive has not been posted */ MPIDI_Request_set_msg_type( sreq, MPIDI_REQUEST_RNDV_MSG ); mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig, data_sz, dt_true_lb, rank, tag, comm, context_offset ); if (sreq && dt_ptr != NULL) { sreq->dev.datatype_ptr = dt_ptr; MPID_Datatype_add_ref(dt_ptr); } } fn_exit: *request = sreq; MPIU_DBG_STMT(CH3_OTHER,VERBOSE,{ if (sreq != NULL) { MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,"request allocated, handle=0x%08x", sreq->handle); } } );
int MPID_Send(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, MPID_Request ** request) { MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype * dt_ptr; MPID_Request * sreq = NULL; MPIDI_VC_t * vc; #if defined(MPID_USE_SEQUENCE_NUMBERS) MPID_Seqnum_t seqnum; #endif int eager_threshold = -1; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_SEND); MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEND); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "rank=%d, tag=%d, context=%d", rank, tag, comm->context_id + context_offset)); /* Check to make sure the communicator hasn't already been revoked */ if (comm->revoked && MPIR_AGREE_TAG != MPIR_TAG_MASK_ERROR_BIT(tag & ~MPIR_Process.tagged_coll_mask) && MPIR_SHRINK_TAG != MPIR_TAG_MASK_ERROR_BIT(tag & ~MPIR_Process.tagged_coll_mask)) { MPIU_ERR_SETANDJUMP(mpi_errno,MPIX_ERR_REVOKED,"**revoked"); } if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM) { mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_SEND, &sreq); /* In the single threaded case, sending to yourself will cause deadlock. Note that in the runtime-thread case, this check will not be made (long-term FIXME) */ # ifndef MPICH_IS_THREADED { if (sreq != NULL && sreq->cc != 0) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**dev|selfsenddeadlock"); } } # endif if (mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); } goto fn_exit; } if (rank == MPI_PROC_NULL) { goto fn_exit; } MPIDI_Comm_get_vc_set_active(comm, rank, &vc); #ifdef ENABLE_COMM_OVERRIDES if (vc->comm_ops && vc->comm_ops->send) { mpi_errno = vc->comm_ops->send( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq); goto fn_exit; } #endif MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); if (data_sz == 0) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eager_send_t * const eager_pkt = &upkt.eager_send; MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message"); MPIDI_Pkt_init(eager_pkt, MPIDI_CH3_PKT_EAGER_SEND); eager_pkt->match.parts.rank = comm->rank; eager_pkt->match.parts.tag = tag; eager_pkt->match.parts.context_id = comm->context_id + context_offset; eager_pkt->sender_req_id = MPI_REQUEST_NULL; eager_pkt->data_sz = 0; MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(eager_pkt, seqnum); MPIU_THREAD_CS_ENTER(CH3COMM,vc); mpi_errno = MPIDI_CH3_iStartMsg(vc, eager_pkt, sizeof(*eager_pkt), &sreq); MPIU_THREAD_CS_EXIT(CH3COMM,vc); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|eagermsg"); } /* --END ERROR HANDLING-- */ if (sreq != NULL) { MPIDI_Request_set_seqnum(sreq, seqnum); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND); /* sreq->comm = comm; MPIR_Comm_add_ref(comm); -- not necessary for blocking functions */ } goto fn_exit; } MPIDI_CH3_GET_EAGER_THRESHOLD(&eager_threshold, comm, vc); /* FIXME: flow control: limit number of outstanding eager messages containing data and need to be buffered by the receiver */ #ifdef USE_EAGER_SHORT if (dt_contig && data_sz <= MPIDI_EAGER_SHORT_SIZE) { mpi_errno = MPIDI_CH3_EagerContigShortSend( &sreq, MPIDI_CH3_PKT_EAGERSHORT_SEND, (char *)buf + dt_true_lb, data_sz, rank, tag, comm, context_offset ); } else #endif if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <= eager_threshold) { if (dt_contig) { mpi_errno = MPIDI_CH3_EagerContigSend( &sreq, MPIDI_CH3_PKT_EAGER_SEND, (char *)buf + dt_true_lb, data_sz, rank, tag, comm, context_offset ); } else { MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND); mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, MPIDI_CH3_PKT_EAGER_SEND, buf, count, datatype, data_sz, rank, tag, comm, context_offset ); } } else {
int MPID_Isend(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, MPID_Request ** request) { MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype * dt_ptr; MPID_Request * sreq; MPIDI_VC_t * vc=0; #if defined(MPID_USE_SEQUENCE_NUMBERS) MPID_Seqnum_t seqnum; #endif int eager_threshold = -1; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_ISEND); MPIDI_FUNC_ENTER(MPID_STATE_MPID_ISEND); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "rank=%d, tag=%d, context=%d", rank, tag, comm->context_id + context_offset)); if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM) { mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_SEND, &sreq); goto fn_exit; } if (rank != MPI_PROC_NULL) { MPIDI_Comm_get_vc_set_active(comm, rank, &vc); #ifdef ENABLE_COMM_OVERRIDES /* this needs to come before the sreq is created, since the override * function is responsible for creating its own request */ if (vc->comm_ops && vc->comm_ops->isend) { mpi_errno = vc->comm_ops->isend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq); goto fn_exit; } #endif } MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND); if (rank == MPI_PROC_NULL) { MPIU_Object_set_ref(sreq, 1); MPID_cc_set(&sreq->cc, 0); goto fn_exit; } MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); if (data_sz == 0) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eager_send_t * const eager_pkt = &upkt.eager_send; MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_EAGER_MSG); sreq->dev.OnDataAvail = 0; MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message"); MPIDI_Pkt_init(eager_pkt, MPIDI_CH3_PKT_EAGER_SEND); eager_pkt->match.parts.rank = comm->rank; eager_pkt->match.parts.tag = tag; eager_pkt->match.parts.context_id = comm->context_id + context_offset; eager_pkt->sender_req_id = sreq->handle; eager_pkt->data_sz = 0; MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(eager_pkt, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); MPIU_THREAD_CS_ENTER(CH3COMM,vc); mpi_errno = MPIDI_CH3_iSend(vc, sreq, eager_pkt, sizeof(*eager_pkt)); MPIU_THREAD_CS_EXIT(CH3COMM,vc); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { MPIU_Object_set_ref(sreq, 0); MPIDI_CH3_Request_destroy(sreq); sreq = NULL; MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3|eagermsg"); goto fn_exit; } /* --END ERROR HANDLING-- */ goto fn_exit; } MPIDI_CH3_GET_EAGER_THRESHOLD(&eager_threshold, comm, vc); /* FIXME: flow control: limit number of outstanding eager messages containing data and need to be buffered by the receiver */ if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <= eager_threshold) { if (dt_contig) { mpi_errno = MPIDI_CH3_EagerContigIsend( &sreq, MPIDI_CH3_PKT_EAGER_SEND, (char*)buf + dt_true_lb, data_sz, rank, tag, comm, context_offset ); } else { mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, MPIDI_CH3_PKT_EAGER_SEND, buf, count, datatype, data_sz, rank, tag, comm, context_offset ); /* If we're not complete, then add a reference to the datatype */ if (sreq && sreq->dev.OnDataAvail) { sreq->dev.datatype_ptr = dt_ptr; MPID_Datatype_add_ref(dt_ptr); } } } else { /* Note that the sreq was created above */ MPIDI_Request_set_msg_type( sreq, MPIDI_REQUEST_RNDV_MSG ); mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig, data_sz, dt_true_lb, rank, tag, comm, context_offset ); /* FIXME: fill temporary IOV or pack temporary buffer after send to hide some latency. This requires synchronization because the CTS packet could arrive and be processed before the above iStartmsg completes (depending on the progress engine, threads, etc.). */ if (sreq && dt_ptr != NULL) { sreq->dev.datatype_ptr = dt_ptr; MPID_Datatype_add_ref(dt_ptr); } } fn_exit: *request = sreq; MPIU_DBG_STMT(CH3_OTHER,VERBOSE, { if (sreq != NULL) { MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,"request allocated, handle=0x%08x", sreq->handle); } } );
int MPIDI_CH3_PktHandler_RndvClrToSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp ) { MPIDI_CH3_Pkt_rndv_clr_to_send_t * cts_pkt = &pkt->rndv_clr_to_send; MPID_Request * sreq; MPID_Request * rts_sreq; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_rndv_send_t * rs_pkt = &upkt.rndv_send; int dt_contig; MPI_Aint dt_true_lb; MPIDI_msg_sz_t data_sz; MPID_Datatype * dt_ptr; int mpi_errno = MPI_SUCCESS; MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"received rndv CTS pkt"); MPID_Request_get_ptr(cts_pkt->sender_req_id, sreq); MPIU_DBG_PRINTF(("received cts, count=%d\n", sreq->dev.user_count)); sreq->dev.OnDataAvail = 0; sreq->dev.OnFinal = 0; /* Release the RTS request if one exists. MPID_Request_fetch_and_clear_rts_sreq() needs to be atomic to prevent cancel send from cancelling the wrong (future) request. If MPID_Request_fetch_and_clear_rts_sreq() returns a NULL rts_sreq, then MPID_Cancel_send() is responsible for releasing the RTS request object. */ MPIDI_Request_fetch_and_clear_rts_sreq(sreq, &rts_sreq); if (rts_sreq != NULL) { MPID_Request_release(rts_sreq); } *buflen = sizeof(MPIDI_CH3_Pkt_t); MPIDI_Pkt_init(rs_pkt, MPIDI_CH3_PKT_RNDV_SEND); rs_pkt->receiver_req_id = cts_pkt->receiver_req_id; MPIDI_Datatype_get_info(sreq->dev.user_count, sreq->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); if (dt_contig) { MPID_IOV iov[MPID_IOV_LIMIT]; MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "sending contiguous rndv data, data_sz=" MPIDI_MSG_SZ_FMT, data_sz)); iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)rs_pkt; iov[0].MPID_IOV_LEN = sizeof(*rs_pkt); iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)((char *)sreq->dev.user_buf + dt_true_lb); iov[1].MPID_IOV_LEN = data_sz; MPIU_THREAD_CS_ENTER(CH3COMM,vc); mpi_errno = MPIU_CALL(MPIDI_CH3,iSendv(vc, sreq, iov, 2)); MPIU_THREAD_CS_EXIT(CH3COMM,vc); MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|senddata"); } else { sreq->dev.segment_ptr = MPID_Segment_alloc( ); MPIU_ERR_CHKANDJUMP1((sreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc"); MPID_Segment_init(sreq->dev.user_buf, sreq->dev.user_count, sreq->dev.datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; sreq->dev.segment_size = data_sz; MPIU_THREAD_CS_ENTER(CH3COMM,vc); mpi_errno = vc->sendNoncontig_fn(vc, sreq, rs_pkt, sizeof(*rs_pkt)); MPIU_THREAD_CS_EXIT(CH3COMM,vc); MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|senddata"); } *rreqp = NULL; fn_fail: return mpi_errno; }
/* MPIDI_CH3_RndvSend - Send a request to perform a rendezvous send */ int MPIDI_CH3_RndvSend( MPID_Request **sreq_p, const void * buf, int count, MPI_Datatype datatype, int dt_contig, MPIDI_msg_sz_t data_sz, MPI_Aint dt_true_lb, int rank, int tag, MPID_Comm * comm, int context_offset ) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_rndv_req_to_send_t * const rts_pkt = &upkt.rndv_req_to_send; MPIDI_VC_t * vc; MPID_Request * rts_sreq; MPID_Request *sreq =*sreq_p; int mpi_errno = MPI_SUCCESS; MPIU_DBG_MSG_D(CH3_OTHER,VERBOSE, "sending rndv RTS, data_sz=" MPIDI_MSG_SZ_FMT, data_sz); sreq->dev.OnDataAvail = 0; sreq->partner_request = NULL; MPIDI_Pkt_init(rts_pkt, MPIDI_CH3_PKT_RNDV_REQ_TO_SEND); rts_pkt->match.parts.rank = comm->rank; rts_pkt->match.parts.tag = tag; rts_pkt->match.parts.context_id = comm->context_id + context_offset; rts_pkt->sender_req_id = sreq->handle; rts_pkt->data_sz = data_sz; MPIDI_Comm_get_vc_set_active(comm, rank, &vc); MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(rts_pkt, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); MPIU_DBG_MSGPKT(vc,tag,rts_pkt->match.parts.context_id,rank,data_sz,"Rndv"); MPIU_THREAD_CS_ENTER(CH3COMM,vc); mpi_errno = MPIU_CALL(MPIDI_CH3,iStartMsg(vc, rts_pkt, sizeof(*rts_pkt), &rts_sreq)); MPIU_THREAD_CS_EXIT(CH3COMM,vc); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { MPIU_Object_set_ref(sreq, 0); MPIDI_CH3_Request_destroy(sreq); *sreq_p = NULL; MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rtspkt"); } /* --END ERROR HANDLING-- */ if (rts_sreq != NULL) { if (rts_sreq->status.MPI_ERROR != MPI_SUCCESS) { MPIU_Object_set_ref(sreq, 0); MPIDI_CH3_Request_destroy(sreq); *sreq_p = NULL; mpi_errno = rts_sreq->status.MPI_ERROR; MPID_Request_release(rts_sreq); MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rtspkt"); } MPID_Request_release(rts_sreq); } /* FIXME: fill temporary IOV or pack temporary buffer after send to hide some latency. This requires synchronization because the CTS packet could arrive and be processed before the above iStartmsg completes (depending on the progress engine, threads, etc.). */ fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_CH3_PktHandler_RndvReqToSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp ) { MPID_Request * rreq; int found; MPIDI_CH3_Pkt_rndv_req_to_send_t * rts_pkt = &pkt->rndv_req_to_send; int mpi_errno = MPI_SUCCESS; MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "received rndv RTS pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d, data_sz=" MPIDI_MSG_SZ_FMT, rts_pkt->sender_req_id, rts_pkt->match.parts.rank, rts_pkt->match.parts.tag, rts_pkt->match.parts.context_id, rts_pkt->data_sz)); MPIU_DBG_MSGPKT(vc,rts_pkt->match.parts.tag,rts_pkt->match.parts.context_id, rts_pkt->match.parts.rank,rts_pkt->data_sz, "ReceivedRndv"); MPIU_THREAD_CS_ENTER(MSGQUEUE,); rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&rts_pkt->match, &found); MPIU_ERR_CHKANDJUMP1(!rreq, mpi_errno,MPI_ERR_OTHER, "**nomemreq", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp()); set_request_info(rreq, rts_pkt, MPIDI_REQUEST_RNDV_MSG); MPIU_THREAD_CS_EXIT(MSGQUEUE,); *buflen = sizeof(MPIDI_CH3_Pkt_t); if (found) { MPID_Request * cts_req; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_rndv_clr_to_send_t * cts_pkt = &upkt.rndv_clr_to_send; MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"posted request found"); /* FIXME: What if the receive user buffer is not big enough to hold the data about to be cleared for sending? */ MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending rndv CTS packet"); MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RNDV_CLR_TO_SEND); cts_pkt->sender_req_id = rts_pkt->sender_req_id; cts_pkt->receiver_req_id = rreq->handle; MPIU_THREAD_CS_ENTER(CH3COMM,vc); mpi_errno = MPIU_CALL(MPIDI_CH3,iStartMsg(vc, cts_pkt, sizeof(*cts_pkt), &cts_req)); MPIU_THREAD_CS_EXIT(CH3COMM,vc); if (mpi_errno != MPI_SUCCESS) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|ctspkt"); } if (cts_req != NULL) { MPID_Request_release(cts_req); } } else { MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"unexpected request allocated"); /* * A MPID_Probe() may be waiting for the request we just * inserted, so we need to tell the progress engine to exit. * * FIXME: This will cause MPID_Progress_wait() to return to the * MPI layer each time an unexpected RTS packet is * received. MPID_Probe() should atomically increment a * counter and MPIDI_CH3_Progress_signal_completion() * should only be called if that counter is greater than zero. */ MPIDI_CH3_Progress_signal_completion(); } *rreqp = NULL; fn_fail: return mpi_errno; }
/** * \brief The callback for a new RZV RTS * \note Because this is a short message, the data is already received * \param[in] context The context on which the message is being received. * \param[in] sender The origin endpoint * \param[in] _msginfo The extended header information * \param[in] msginfo_size The size of the extended header information * \param[in] is_zero_byte The rendezvous message is zero bytes in length. */ void MPIDI_RecvRzvCB_impl(pami_context_t context, pami_endpoint_t sender, const void * _msginfo, size_t msginfo_size, const unsigned is_zero_byte) { MPID_assert(_msginfo != NULL); MPID_assert(msginfo_size == sizeof(MPIDI_MsgEnvelope)); const MPIDI_MsgEnvelope * envelope = (const MPIDI_MsgEnvelope *)_msginfo; const MPIDI_MsgInfo * msginfo = (const MPIDI_MsgInfo *)&envelope->msginfo; MPID_Request * rreq = NULL; int found; pami_task_t source; #if TOKEN_FLOW_CONTROL int rettoks=0; #endif /* -------------------- */ /* Match the request. */ /* -------------------- */ unsigned rank = msginfo->MPIrank; unsigned tag = msginfo->MPItag; unsigned context_id = msginfo->MPIctxt; MPID_Request *newreq = MPIDI_Request_create2(); MPIU_THREAD_CS_ENTER(MSGQUEUE,0); source = PAMIX_Endpoint_query(sender); MPIDI_Receive_tokens(msginfo,source); #ifndef OUT_OF_ORDER_HANDLING rreq = MPIDI_Recvq_FDP_or_AEU(newreq, rank, tag, context_id, &found); #else rreq = MPIDI_Recvq_FDP_or_AEU(newreq, rank, source, tag, context_id, msginfo->MPIseqno, &found); #endif TRACE_ERR("RZV CB for req=%p remote-mr=0x%llx bytes=%zu (%sfound)\n", rreq, *(unsigned long long*)&envelope->envelope.memregion, envelope->envelope.length, found?"":"not "); /* ---------------------- */ /* Copy in information. */ /* ---------------------- */ rreq->status.MPI_SOURCE = rank; rreq->status.MPI_TAG = tag; MPIR_STATUS_SET_COUNT(rreq->status, envelope->length); MPIDI_Request_setPeerRank_comm(rreq, rank); MPIDI_Request_setPeerRank_pami(rreq, source); MPIDI_Request_cpyPeerRequestH (rreq, msginfo); MPIDI_Request_setSync (rreq, msginfo->isSync); MPIDI_Request_setRzv (rreq, 1); /* ----------------------------------------------------- */ /* Save the rendezvous information for when the target */ /* node calls a receive function and the data is */ /* retreived from the origin node. */ /* ----------------------------------------------------- */ if (is_zero_byte) { rreq->mpid.envelope.length = 0; rreq->mpid.envelope.data = NULL; } else { #ifdef USE_PAMI_RDMA memcpy(&rreq->mpid.envelope.memregion, &envelope->memregion, sizeof(pami_memregion_t)); #else rreq->mpid.envelope.memregion_used = envelope->memregion_used; if(envelope->memregion_used) { memcpy(&rreq->mpid.envelope.memregion, &envelope->memregion, sizeof(pami_memregion_t)); } rreq->mpid.envelope.data = envelope->data; #endif rreq->mpid.envelope.length = envelope->length; TRACE_SET_R_VAL(source,(rreq->mpid.idx),req,rreq); TRACE_SET_R_VAL(source,(rreq->mpid.idx),rlen,envelope->length); TRACE_SET_R_VAL(source,(rreq->mpid.idx),fl.f.sync,msginfo->isSync); TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.rzv); if (TOKEN_FLOW_CONTROL_ON) { #if TOKEN_FLOW_CONTROL MPIDI_Must_return_tokens(context,source); #else MPID_assert_always(0); #endif } } /* ----------------------------------------- */ /* figure out target buffer for request data */ /* ----------------------------------------- */ if (found) { #if (MPIDI_STATISTICS) MPID_NSTAT(mpid_statp->earlyArrivalsMatched); #endif /* --------------------------- */ /* if synchronized, post ack. */ /* --------------------------- */ if (unlikely(MPIDI_Request_isSync(rreq))) MPIDI_SyncAck_post(context, rreq, MPIDI_Request_getPeerRank_pami(rreq)); MPIU_THREAD_CS_EXIT(MSGQUEUE,0); if (is_zero_byte) MPIDI_RecvRzvDoneCB_zerobyte(context, rreq, PAMI_SUCCESS); else { MPIDI_RendezvousTransfer(context, rreq); TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.sync_com_in_HH); TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.matchedInHH); TRACE_SET_R_VAL(source,(rreq->mpid.idx),bufadd,rreq->mpid.userbuf); } MPID_Request_discard(newreq); } /* ------------------------------------------------------------- */ /* Request was not posted. */ /* ------------------------------------------------------------- */ else { #if (MPIDI_STATISTICS) MPID_NSTAT(mpid_statp->earlyArrivals); #endif /* * This is to test that the fields don't need to be * initialized. Remove after this doesn't fail for a while. */ MPID_assert(rreq->mpid.uebuf == NULL); MPID_assert(rreq->mpid.uebuflen == 0); /* rreq->mpid.uebuf = NULL; */ /* rreq->mpid.uebuflen = 0; */ #ifdef OUT_OF_ORDER_HANDLING if (MPIDI_In_cntr[source].n_OutOfOrderMsgs > 0) { MPIDI_Recvq_process_out_of_order_msgs(source, context); } #endif MPIU_THREAD_CS_EXIT(MSGQUEUE,0); } /* ---------------------------------------- */ /* Signal that the recv has been started. */ /* ---------------------------------------- */ MPIDI_Progress_signal(); }