void MPIDI_Win_datatype_map(MPIDI_Datatype * dt) { if (dt->contig) { dt->num_contig = 1; dt->map = &dt->__map; dt->map[0].DLOOP_VECTOR_BUF = (void*)(size_t)dt->true_lb; dt->map[0].DLOOP_VECTOR_LEN = dt->size; } else { unsigned map_size = dt->pointer->max_contig_blocks*dt->count + 1; dt->num_contig = map_size; dt->map = (DLOOP_VECTOR*)MPIU_Malloc(map_size * sizeof(DLOOP_VECTOR)); MPID_assert(dt->map != NULL); DLOOP_Offset last = dt->pointer->size*dt->count; MPID_Segment seg; MPID_Segment_init(NULL, dt->count, dt->type, &seg, 0); MPID_Segment_pack_vector(&seg, 0, &last, dt->map, &dt->num_contig); MPID_assert((unsigned)dt->num_contig <= map_size); #ifdef TRACE_ON TRACE_ERR("dt->pointer->size=%d num_contig: orig=%u new=%d\n", dt->pointer->size, map_size, dt->num_contig); int i; for(i=0; i<dt->num_contig; ++i) TRACE_ERR(" %d: BUF=%zu LEN=%zu\n", i, (size_t)dt->map[i].DLOOP_VECTOR_BUF, (size_t)dt->map[i].DLOOP_VECTOR_LEN); #endif } }
static pami_result_t MPIDI_Fetch_and_op_using_pami_rmw(pami_context_t context, void * _req) { MPIDI_Win_request *req = (MPIDI_Win_request*)_req; pami_result_t rc; int target_rank; MPID_assert(req != NULL); target_rank = req->target.rank; pami_rmw_t params; params=zero_rmw_parms; params.dest=req->dest; params.cookie=(void *)req; params.done_fn=MPIDI_Win_DoneCB; params.type = req->pami_datatype; params.operation = req->pami_op; params.local=req->user_buffer; /*result*/ params.remote=req->win->mpid.info[target_rank].base_addr + req->offset + (size_t)req->origin.dt.map[0].DLOOP_VECTOR_BUF; params.value=req->buffer; /* replaced value with origin */ rc = PAMI_Rmw(context, ¶ms); MPID_assert(rc == PAMI_SUCCESS); return rc; }
static inline int MPIDI_Get_use_pami_get(pami_context_t context, MPIDI_Win_request * req) { pami_result_t rc; pami_get_simple_t params; params=zero_get_parms; params.rma.dest=req->dest; params.rma.hints.use_rdma = PAMI_HINT_DEFAULT; #ifndef OUT_OF_ORDER_HANDLING params.rma.hints.no_long_header= 1, #endif params.rma.bytes = 0; params.rma.cookie = req; params.rma.done_fn = MPIDI_Win_DoneCB; params.addr.local=req->buffer; params.addr.remote= req->win->mpid.info[req->target.rank].base_addr; struct MPIDI_Win_sync* sync = &req->win->mpid.sync; TRACE_ERR("Start index=%u/%d l-addr=%p r-base=%p r-offset=%zu (sync->started=%u sync->complete=%u)\n", req->state.index, req->target.dt.num_contig, req->buffer, req->win->mpid.info[req->target.rank].base_addr, req->offset, sync->started, sync->complete); while (req->state.index < req->target.dt.num_contig) { if (sync->started > sync->complete + MPIDI_Process.rma_pending) { TRACE_ERR("Bailing out; index=%u/%d sync->started=%u sync->complete=%u\n", req->state.index, req->target.dt.num_contig, sync->started, sync->complete); return PAMI_EAGAIN; } ++sync->started; params.rma.bytes = req->target.dt.map[req->state.index].DLOOP_VECTOR_LEN; params.addr.local = req->buffer+req->state.local_offset; params.addr.remote = req->win->mpid.info[req->target.rank].base_addr+ req->offset + (size_t)req->target.dt.map[req->state.index].DLOOP_VECTOR_BUF; #ifdef TRACE_ON unsigned* buf = (unsigned*)(req->buffer + params.rdma.local.offset); #endif TRACE_ERR(" Sub index=%u bytes=%zu l-offset=%zu r-offset=%zu buf=%p *(int*)buf=0x%08x\n", req->state.index, params.rma.bytes, params.rdma.local.offset, params.rdma.remote.offset, buf, *buf); /** sync->total will be updated with every RMA and the complete will not change till that RMA has completed. In the meanwhile the rest of the RMAs will have memory leaks */ if (req->target.dt.num_contig - req->state.index == 1) { rc = PAMI_Get(context, ¶ms); MPID_assert(rc == PAMI_SUCCESS); return PAMI_SUCCESS; } else { rc = PAMI_Get(context, ¶ms); MPID_assert(rc == PAMI_SUCCESS); req->state.local_offset += params.rma.bytes; ++req->state.index; } } return PAMI_SUCCESS; }
static pami_result_t MPIDI_Accumulate(pami_context_t context, void * _req) { MPIDI_Win_request *req = (MPIDI_Win_request*)_req; pami_result_t rc; void *map; pami_send_t params; params = zero_send_parms; params.send.header.iov_len = sizeof(MPIDI_Win_MsgInfo); params.send.dispatch = MPIDI_Protocols_WinAccum; params.send.dest = req->dest; params.events.cookie = req; params.events.remote_fn = MPIDI_Win_DoneCB; struct MPIDI_Win_sync* sync = &req->win->mpid.sync; TRACE_ERR("Start index=%u/%d l-addr=%p r-base=%p r-offset=%zu (sync->started=%u sync->complete=%u)\n", req->state.index, req->target.dt.num_contig, req->buffer, req->win->mpid.info[req->target.rank].base_addr, req->offset, sync->started, sync->complete); while (req->state.index < req->target.dt.num_contig) { if (sync->started > sync->complete + MPIDI_Process.rma_pending) { TRACE_ERR("Bailing out; index=%u/%d sync->started=%u sync->complete=%u\n", req->state.index, req->target.dt.num_contig, sync->started, sync->complete); return PAMI_EAGAIN; } ++sync->started; params.send.header.iov_base = &(((MPIDI_Win_MsgInfo *)req->accum_headers)[req->state.index]); params.send.data.iov_len = req->target.dt.map[req->state.index].DLOOP_VECTOR_LEN; params.send.data.iov_base = req->buffer + req->state.local_offset; #ifdef TRACE_ON void * buf = params.send.data.iov_base; unsigned* ibuf = (unsigned*)buf; double * dbuf = (double *)buf; TRACE_ERR(" Sub index=%u bytes=%zu l-offset=%zu r-addr=%p l-buf=%p *(int*)buf=0x%08x *(double*)buf=%g\n", req->state.index, params.send.data.iov_len, req->state.local_offset, req->accum_headers[req->state.index].addr, buf, *ibuf, *dbuf); #endif /** sync->total will be updated with every RMA and the complete will not change till that RMA has completed. In the meanwhile the rest of the RMAs will have memory leaks */ if (req->target.dt.num_contig - req->state.index == 1) { rc = PAMI_Send(context, ¶ms); MPID_assert(rc == PAMI_SUCCESS); return PAMI_SUCCESS; } else { rc = PAMI_Send(context, ¶ms); MPID_assert(rc == PAMI_SUCCESS); req->state.local_offset += params.send.data.iov_len; ++req->state.index; } } return PAMI_SUCCESS; }
void MPIDI_WinAtomicCB(pami_context_t context, void * cookie, const void * _hdr, size_t size, const void * sndbuf, size_t sndlen, pami_endpoint_t sender, pami_recv_t * recv) { MPIDI_AtomicHeader_t *ahdr = (MPIDI_AtomicHeader_t *) _hdr; MPID_assert (ahdr != NULL); MPID_assert (sizeof(MPIDI_AtomicHeader_t) == size); MPIDI_AtomicHeader_t ack_hdr = *ahdr; void *dest_addr = ahdr->remote_addr; int len; len = MPID_Datatype_get_basic_size (ahdr->datatype); if (ahdr->atomic_type == MPIDI_WIN_REQUEST_COMPARE_AND_SWAP) { //overwrite value with result in ack_hdr MPIU_Memcpy(ack_hdr.buf, dest_addr, len); if (MPIR_Compare_equal (&ahdr->test, dest_addr, ahdr->datatype)) MPIU_Memcpy(dest_addr, ahdr->buf, len); } else if (ahdr->atomic_type == MPIDI_WIN_REQUEST_FETCH_AND_OP) { //overwrite value with result MPIU_Memcpy(ack_hdr.buf, dest_addr, len); MPI_User_function *uop; int one = 1; uop = MPIR_OP_HDL_TO_FN(ahdr->op); if (ahdr->op == MPI_REPLACE) MPIU_Memcpy(dest_addr, ahdr->buf, len); else if (ahdr->op == MPI_NO_OP); else (*uop) ((void *)ahdr->buf, dest_addr, &one, &ahdr->datatype); } else MPID_abort(); pami_send_immediate_t params = { .dispatch = MPIDI_Protocols_WinAtomicAck, .dest = sender, .header = { .iov_base = &ack_hdr, .iov_len = sizeof(MPIDI_AtomicHeader_t), }, .data = { .iov_base = NULL, .iov_len = 0, }, .hints = {0},
/** * \brief The callback for a new "zero byte" RZV RTS * \param[in] context The context on which the message is being received. * \param[in] cookie Unused * \param[in] _msginfo The extended header information * \param[in] msginfo_size The size of the extended header information * \param[in] sndbuf Unused * \param[in] sndlen Unused * \param[in] sender The origin endpoint * \param[out] recv Unused */ void MPIDI_RecvRzvCB_zerobyte(pami_context_t context, void * cookie, const void * _msginfo, size_t msginfo_size, const void * sndbuf, size_t sndlen, pami_endpoint_t sender, pami_recv_t * recv) { MPID_assert(recv == NULL); MPID_assert(sndlen == 0); MPIDI_RecvRzvCB_impl (context, sender, _msginfo, msginfo_size, 1); }
/* MSGQUEUE lock is not held */ void MPIDI_Callback_process_userdefined_dt(pami_context_t context, const void * sndbuf, size_t sndlen, MPID_Request * rreq) { unsigned dt_contig, dt_size; MPID_Datatype *dt_ptr; MPI_Aint dt_true_lb; MPIDI_Datatype_get_info(rreq->mpid.userbufcount, rreq->mpid.datatype, dt_contig, dt_size, dt_ptr, dt_true_lb); /* ----------------------------- */ /* Test for truncated message. */ /* ----------------------------- */ if (unlikely(sndlen > dt_size)) { #if ASSERT_LEVEL > 0 MPIDI_Callback_process_trunc(context, rreq, NULL, sndbuf); return; #else sndlen = dt_size; #endif } /* * This is to test that the fields don't need to be * initialized. Remove after this doesn't fail for a while. */ if (likely (dt_contig)) { MPID_assert(rreq->mpid.uebuf == NULL); MPID_assert(rreq->mpid.uebuflen == 0); void* rcvbuf = rreq->mpid.userbuf + dt_true_lb;; memcpy(rcvbuf, sndbuf, sndlen); MPIDI_Request_complete(rreq); return; } MPIDI_Request_setCA(rreq, MPIDI_CA_UNPACK_UEBUF_AND_COMPLETE); rreq->mpid.uebuflen = sndlen; rreq->mpid.uebuf = (void*)sndbuf; MPIDI_RecvDoneCB(context, rreq, PAMI_SUCCESS); MPID_Request_release(rreq); }
/* MSGQUEUE lock is not held */ void MPIDI_Callback_process_trunc(pami_context_t context, MPID_Request *rreq, pami_recv_t *recv, const void *sndbuf) { rreq->status.MPI_ERROR = MPI_ERR_TRUNCATE; /* -------------------------------------------------------------- */ /* The data is already available, so we can just unpack it now. */ /* -------------------------------------------------------------- */ if (recv) { MPIDI_Request_setCA(rreq, MPIDI_CA_UNPACK_UEBUF_AND_COMPLETE); rreq->mpid.uebuflen = MPIR_STATUS_GET_COUNT(rreq->status); rreq->mpid.uebuf = MPIU_Malloc(MPIR_STATUS_GET_COUNT(rreq->status)); MPID_assert(rreq->mpid.uebuf != NULL); rreq->mpid.uebuf_malloc = mpiuMalloc; recv->addr = rreq->mpid.uebuf; } else { MPIDI_Request_setCA(rreq, MPIDI_CA_UNPACK_UEBUF_AND_COMPLETE); rreq->mpid.uebuflen = MPIR_STATUS_GET_COUNT(rreq->status); rreq->mpid.uebuf = (void*)sndbuf; MPIDI_RecvDoneCB(context, rreq, PAMI_SUCCESS); MPID_Request_release(rreq); } }
void MPIDI_WinCtrlSend(pami_context_t context, MPIDI_Win_control_t *control, int rank, MPID_Win *win) { pami_task_t taskid; MPIDI_WinLock_info *winLock; control->win = win->mpid.info[rank].win; control->rank = win->comm_ptr->rank; taskid=MPID_VCR_GET_LPID(win->comm_ptr->vcr,rank); pami_endpoint_t dest; pami_result_t rc; taskid=MPID_VCR_GET_LPID(win->comm_ptr->vcr,rank); rc = PAMI_Endpoint_create(MPIDI_Client,taskid, 0, &dest); MPID_assert(rc == PAMI_SUCCESS); if ((control->type == MPIDI_WIN_MSGTYPE_UNLOCK) || (control->type == MPIDI_WIN_MSGTYPE_UNLOCKALL)) { pami_send_t params = { .send = { .dispatch = MPIDI_Protocols_WinCtrl, .dest = dest, .header = { .iov_base = control, .iov_len = sizeof(MPIDI_Win_control_t), }, }, .events = { .cookie = win, .local_fn = NULL, .remote_fn= MPIDI_WinUnlockDoneCB, }, };
void MPIDI_WinLockReq_proc(pami_context_t context, const MPIDI_Win_control_t * info, unsigned peer) { MPID_Win * win = info->win; struct MPIDI_Win_lock* lock = MPL_calloc0(1, struct MPIDI_Win_lock); if (info->type == MPIDI_WIN_MSGTYPE_LOCKREQ) lock->mtype = MPIDI_REQUEST_LOCK; else if (info->type == MPIDI_WIN_MSGTYPE_LOCKALLREQ) { lock->mtype = MPIDI_REQUEST_LOCKALL; lock->flagAddr = (void *) info->flagAddr; } lock->rank = info->rank; lock->type = info->data.lock.type; struct MPIDI_Win_queue* q = &win->mpid.sync.lock.local.requested; MPID_assert( (q->head != NULL) ^ (q->tail == NULL) ); if (q->tail == NULL) q->head = lock; else q->tail->next = lock; q->tail = lock; MPIDI_WinLockAdvance(context, win); }
void MPIDI_Request_allocate_pool() { int i; MPID_Request *prev, *cur; /* batch allocate a linked list of requests */ MPIU_THREAD_CS_ENTER(HANDLEALLOC,); prev = MPIU_Handle_obj_alloc_unsafe(&MPID_Request_mem); MPID_assert(prev != NULL); prev->mpid.next = NULL; for (i = 1; i < MPID_REQUEST_TLS_MAX; ++i) { cur = MPIU_Handle_obj_alloc_unsafe(&MPID_Request_mem); MPID_assert(cur != NULL); cur->mpid.next = prev; prev = cur; } MPIU_THREAD_CS_EXIT(HANDLEALLOC,); MPIDI_Process.request_handles[MPIDI_THREAD_ID()].head = cur; MPIDI_Process.request_handles[MPIDI_THREAD_ID()].count += MPID_REQUEST_TLS_MAX; }
int MPID_Win_set_info(MPID_Win *win, MPID_Info *info) { int mpi_errno = MPI_SUCCESS; mpi_errno = MPIDI_Win_set_info(win, info); MPID_assert(mpi_errno == MPI_SUCCESS); mpi_errno = MPIR_Barrier_impl(win->comm_ptr, &mpi_errno); return mpi_errno; }
void MPIDI_RecvShortSyncCB(pami_context_t context, void * cookie, const void * _msginfo, size_t msginfo_size, const void * sndbuf, size_t sndlen, pami_endpoint_t sender, pami_recv_t * recv) { MPID_assert(recv == NULL); MPID_assert(msginfo_size == sizeof(MPIDI_MsgInfo)); MPIDI_RecvShortCB(context, _msginfo, sndbuf, sndlen, sender, 1); }
static inline int MPID_Cancel_send_rsm(MPID_Request * sreq) { int flag; MPID_assert(sreq != NULL); /* ------------------------------------------------- */ /* Check if we already have a cancel request pending */ /* ------------------------------------------------- */ MPIDI_DCMF_Request_cancel_pending(sreq, &flag); if (flag) return MPI_SUCCESS; /* ------------------------------------ */ /* Try to cancel a send request to self */ /* ------------------------------------ */ if (MPID_Request_isSelf(sreq)) { int source = MPID_Request_getMatchRank(sreq); int tag = MPID_Request_getMatchTag (sreq); int context_id = MPID_Request_getMatchCtxt(sreq); MPID_Request * rreq = MPIDI_Recvq_FDUR(sreq, source, tag, context_id); if (rreq) { MPID_assert(rreq->partner_request == sreq); MPID_Request_release(rreq); sreq->status.cancelled = TRUE; sreq->cc = 0; } return MPI_SUCCESS; } else { if(!sreq->comm) return MPI_SUCCESS; MPID_Request_increment_cc(sreq); MPIDI_DCMF_postCancelReq(sreq); return MPI_SUCCESS; } }
static void MPIDI_Win_GetAccumSendAck(pami_context_t context, void * _info, pami_result_t result) { MPIDI_Win_GetAccMsgInfo *msginfo = (MPIDI_Win_GetAccMsgInfo *) _info; pami_result_t rc = PAMI_SUCCESS; //Copy from msginfo->addr to a contiguous buffer char *buffer = NULL; buffer = MPIU_Malloc(msginfo->size); MPID_assert(buffer != NULL); if (msginfo->num_contig == 1) memcpy(buffer, msginfo->addr, msginfo->size); else { int mpi_errno = 0; mpi_errno = MPIR_Localcopy(msginfo->addr, msginfo->count, msginfo->type, buffer, msginfo->size, MPI_CHAR); MPID_assert(mpi_errno == MPI_SUCCESS); } //Schedule sends to source to result buffer and trigger completion //callback there pami_send_t params = { .send = { .header = { .iov_base = msginfo, .iov_len = sizeof(MPIDI_Win_GetAccMsgInfo), }, .dispatch = MPIDI_Protocols_WinGetAccumAck, .dest = msginfo->src_endpoint, }, .events = {
int MPIDI_Win_allgather( MPI_Aint size, MPID_Win **win_ptr ) { int mpi_errno = MPI_SUCCESS; MPID_Win *win; int rank; MPID_Comm *comm_ptr; size_t length_out = 0; pami_result_t rc; MPIDI_Win_info *winfo; static char FCNAME[] = "MPIDI_Win_allgather"; win = *win_ptr; comm_ptr = win->comm_ptr; rank = comm_ptr->rank; winfo = &win->mpid.info[rank]; if (size != 0 && win->create_flavor != MPI_WIN_FLAVOR_SHARED) { #ifndef USE_PAMI_RDMA if (!MPIDI_Process.mp_s_use_pami_get) { #endif /* --------------------------------------- */ /* Setup the PAMI sections of the window */ /* --------------------------------------- */ rc = PAMI_Memregion_create(MPIDI_Context[0], win->mpid.info[rank].base_addr, win->size, &length_out, &winfo->memregion); #ifdef USE_PAMI_RDMA MPIU_ERR_CHKANDJUMP((rc != PAMI_SUCCESS), mpi_errno, MPI_ERR_OTHER, "**nomem"); MPIU_ERR_CHKANDJUMP((win->size < length_out), mpi_errno, MPI_ERR_OTHER, "**nomem"); #else if (rc == PAMI_SUCCESS) { winfo->memregion_used = 1; MPID_assert(win->size == length_out); } } #endif } mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, win->mpid.info, sizeof(struct MPIDI_Win_info), MPI_BYTE, comm_ptr, &mpi_errno); fn_fail: return mpi_errno; }
void MPIDI_WinAccumCB(pami_context_t context, void * cookie, const void * _msginfo, size_t msginfo_size, const void * sndbuf, size_t sndlen, pami_endpoint_t sender, pami_recv_t * recv) { MPID_assert(recv != NULL); MPID_assert(sndbuf == NULL); MPID_assert(msginfo_size == sizeof(MPIDI_Win_MsgInfo)); MPID_assert(_msginfo != NULL); const MPIDI_Win_MsgInfo * msginfo = (const MPIDI_Win_MsgInfo*)_msginfo; int null=0; pami_type_t pami_type; pami_data_function pami_op; MPIDI_Datatype_to_pami(msginfo->type, &pami_type, msginfo->op, &pami_op, &null); #ifdef TRACE_ON void * buf = msginfo->addr; unsigned* ibuf = (unsigned*)buf; double * dbuf = (double *)buf; TRACE_ERR("New accum msg: len=%zu type=%x op=%x l-buf=%p *(int*)buf=0x%08x *(double*)buf=%g\n", sndlen, msginfo->type, msginfo->op, buf, *ibuf, *dbuf); TRACE_ERR(" PAMI: type=%p op=%p\n", pami_type, pami_op); #endif MPID_assert(recv != NULL); *recv = zero_recv_parms; recv->cookie = NULL; recv->local_fn = NULL; recv->addr = msginfo->addr; recv->type = pami_type; recv->offset = 0; recv->data_fn = pami_op; recv->data_cookie = NULL; }
/** * \brief MPI-PAMI glue for MPI_Win_allocate function * * Create a window object. Allocates a MPID_Win object and initializes it, * then allocates the collective info array, initalizes our entry, and * performs an Allgather to distribute/collect the rest of the array entries. * On each process, it allocates memory of at least size bytes, returns a * pointer to it, and returns a window object that can be used by all processes * in comm to * perform RMA operations. The returned memory consists of size * bytes local to each process, starting at address base_ptr and is associated * with the window as if the user called 'MPI_Win_create' on existing memory. * The size argument may be different at each process and size = 0 is valid; * however, a library might allocate and expose more memory in order to create * a fast, globally symmetric allocation. * Input Parameters: * \param[in] size size of window in bytes (nonnegative integer) * \param[in] disp_unit local unit size for displacements, in bytes (positive integer) * \param[in] info info argument (handle)) * \param[in] comm_ptr Communicator (handle) * \param[out] base_ptr - base address of the window in local memory * \param[out] win_ptr window object returned by the call (handle) * \return MPI_SUCCESS, MPI_ERR_ARG, MPI_ERR_COMM, MPI_ERR_INFO. MPI_ERR_OTHER, * MPI_ERR_SIZE */ int MPID_Win_allocate(MPI_Aint size, int disp_unit, MPID_Info * info, MPID_Comm * comm_ptr, void *base_ptr, MPID_Win ** win_ptr) { int mpi_errno = MPI_SUCCESS; int rc = MPI_SUCCESS; mpir_errflag_t errflag = MPIR_ERR_NONE; void *baseP; static char FCNAME[] = "MPID_Win_allocate"; MPIDI_Win_info *winfo; MPID_Win *win; int rank; rc=MPIDI_Win_init(size,disp_unit,win_ptr, info, comm_ptr, MPI_WIN_FLAVOR_ALLOCATE, MPI_WIN_UNIFIED); win = *win_ptr; if (size > 0) { baseP = MPIU_Malloc(size); #ifndef MPIDI_NO_ASSERT MPID_assert(baseP != NULL); #else MPIU_ERR_CHKANDJUMP((baseP == NULL), mpi_errno, MPI_ERR_BUFFER, "**bufnull"); #endif } else if (size == 0) { baseP = NULL; } else { MPIU_ERR_CHKANDSTMT(size >=0 , mpi_errno, MPI_ERR_SIZE, return mpi_errno, "**rmasize"); } win->base = baseP; rank = comm_ptr->rank; winfo = &win->mpid.info[rank]; winfo->base_addr = baseP; winfo->win = win; winfo->disp_unit = disp_unit; rc= MPIDI_Win_allgather(size,win_ptr); if (rc != MPI_SUCCESS) return rc; *(void**) base_ptr = (void *) win->base; mpi_errno = MPIR_Barrier_impl(comm_ptr, &errflag); fn_fail: return mpi_errno; }
/** * Insert a request in the OutOfOrderList, make sure this list is * arranged in the ascending order. */ void MPIDI_Recvq_enqueue_ool(pami_task_t src, MPID_Request *req) { MPID_Request *q; void *head; int insert,i; MPIDI_In_cntr_t *in_cntr; in_cntr=&MPIDI_In_cntr[src]; if (in_cntr->n_OutOfOrderMsgs != 0) { head=in_cntr->OutOfOrderList; q=in_cntr->OutOfOrderList; insert=0; MPID_assert(q->mpid.nextR != NULL); while(q->mpid.nextR != head) { if (((int)(MPIDI_Request_getMatchSeq(q) - MPIDI_Request_getMatchSeq(req))) > 0) { insert=1; break; } q=q->mpid.nextR; } if (insert) { MPIDI_Recvq_insert_ool(q,req); if (q == head) { /* 1st element in the list */ in_cntr->OutOfOrderList=req; } } else { if (((int)(MPIDI_Request_getMatchSeq(q) - MPIDI_Request_getMatchSeq(req))) > 0) { MPIDI_Recvq_insert_ool(q,req); if (q == head) { /* 1st element in the list */ in_cntr->OutOfOrderList=req; } } else { MPIDI_Recvq_insert_ool((MPID_Request *)q->mpid.nextR,req); } } } else { /* empty list */ in_cntr->OutOfOrderList=req; req->mpid.prevR=req; req->mpid.nextR=req; } in_cntr->n_OutOfOrderMsgs++; #if (MPIDI_STATISTICS) MPID_NSTAT(mpid_statp->unorderedMsgs); #endif } /* void MPIDI_Recvq_insert_ool(pami_task_t src, MPID_Request *N) */
void MPIDI_Win_DoneCB(pami_context_t context, void * cookie, pami_result_t result) { MPIDI_Win_request *req = (MPIDI_Win_request*)cookie; ++req->win->mpid.sync.complete; if ((req->buffer_free) && (req->type == MPIDI_WIN_REQUEST_GET)) { ++req->origin.completed; if (req->origin.completed == req->target.dt.num_contig) { int mpi_errno; mpi_errno = MPIR_Localcopy(req->buffer, req->origin.dt.size, MPI_CHAR, req->origin.addr, req->origin.count, req->origin.datatype); MPID_assert(mpi_errno == MPI_SUCCESS); MPID_Datatype_release(req->origin.dt.pointer); MPIU_Free(req->buffer); req->buffer_free = 0; } } if (req->win->mpid.sync.total == req->win->mpid.sync.complete) { if (req->buffer_free) MPIU_Free(req->buffer); if (req->accum_headers) MPIU_Free(req->accum_headers); MPIU_Free(req); } MPIDI_Progress_signal(); }
void MPIDI_RecvMsg_Unexp(MPID_Request * rreq, void * buf, int count, MPI_Datatype datatype) { /* ------------------------------------------------------------ */ /* message was found in unexpected queue */ /* ------------------------------------------------------------ */ /* We must acknowledge synchronous send requests */ /* The recvnew callback will acknowledge the posted messages */ /* Recv functions will ack the messages that are unexpected */ /* ------------------------------------------------------------ */ #ifdef MPIDI_TRACE MPIDI_In_cntr[(rreq->mpid.partner_id)].R[(rreq->mpid.idx)].matchedInUQ=1; #endif if (MPIDI_Request_isRzv(rreq)) { const unsigned is_sync = MPIDI_Request_isSync(rreq); const unsigned is_zero = (rreq->mpid.envelope.length==0); /* -------------------------------------------------------- */ /* Received an expected flow-control rendezvous RTS. */ /* This is very similar to the found/incomplete case */ /* -------------------------------------------------------- */ if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) { MPID_Datatype_get_ptr(datatype, rreq->mpid.datatype_ptr); MPID_Datatype_add_ref(rreq->mpid.datatype_ptr); } if (likely((is_sync+is_zero) == 0)) MPIDI_Context_post(MPIDI_Context_local(rreq), &rreq->mpid.post_request, MPIDI_RendezvousTransfer, rreq); else if (is_sync != 0) MPIDI_Context_post(MPIDI_Context_local(rreq), &rreq->mpid.post_request, MPIDI_RendezvousTransfer_SyncAck, rreq); else MPIDI_Context_post(MPIDI_Context_local(rreq), &rreq->mpid.post_request, MPIDI_RendezvousTransfer_zerobyte, rreq); } else { if (MPID_cc_is_complete(&rreq->cc)) { if (unlikely(MPIDI_Request_isSync(rreq))) { /* Post this to the context for asynchronous progresss. We cannot do * the send-immediate inline here because we may not have the * context locked (its is being asynchrously advanced). * Must "uncomplete" the message (increment the ref and completion counts) so we * hold onto this request object until this send has completed. When MPIDI_SyncAck_handoff * finishes sending the ack, it will complete the request, decrementing the ref and * completion counts. */ MPIDI_Request_uncomplete(rreq); MPIDI_Send_post(MPIDI_SyncAck_handoff, rreq); } /* -------------------------------- */ /* request is complete */ /* -------------------------------- */ if (rreq->mpid.uebuf != NULL) { if (likely(rreq->status.cancelled == FALSE)) { MPIDI_msg_sz_t _count=0; MPIDI_Buffer_copy(rreq->mpid.uebuf, rreq->mpid.uebuflen, MPI_CHAR, &rreq->status.MPI_ERROR, buf, count, datatype, &_count, &rreq->status.MPI_ERROR); rreq->status.count = _count; } } else { MPID_assert(rreq->mpid.uebuflen == 0); rreq->status.count = 0; } } else { /* -------------------------------- */ /* request is incomplete */ /* -------------------------------- */ if (unlikely(MPIDI_Request_isSync(rreq))) { /* Post this to the context for asynchronous progresss. We cannot do * the send-immediate inline here because we may not have the * context locked (its is being asynchrously advanced). * Must "uncomplete" the message (increment the ref and completion counts) so we * hold onto this request object until this send has completed. When MPIDI_SyncAck_handoff * finishes sending the ack, it will complete the request, decrementing the ref and * completion counts. */ MPIDI_Request_uncomplete(rreq); MPIDI_Send_post(MPIDI_SyncAck_handoff, rreq); } if(rreq->status.cancelled == FALSE) { MPIDI_Request_setCA(rreq, MPIDI_CA_UNPACK_UEBUF_AND_COMPLETE); } if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) { MPID_Datatype_get_ptr(datatype, rreq->mpid.datatype_ptr); MPID_Datatype_add_ref(rreq->mpid.datatype_ptr); } } } }
/** * \brief MPID buffer copy * * Implements non-contiguous buffers correctly. * * \param[in] sbuf The address of the input buffer * \param[in] scount The number of elements in that buffer * \param[in] sdt The datatype of those elements * \param[out] smpi_errno Returns errors * \param[in] rbuf The address of the output buffer * \param[out] rcount The number of elements in that buffer * \param[in] rdt The datatype of those elements * \param[out] rsz The size of the ouput data * \param[out] rmpi_errno Returns errors */ void MPIDI_Buffer_copy( const void * const sbuf, MPI_Aint scount, MPI_Datatype sdt, int * smpi_errno, void * const rbuf, MPI_Aint rcount, MPI_Datatype rdt, MPIDI_msg_sz_t * rsz, int * rmpi_errno) { int sdt_contig; int rdt_contig; MPI_Aint sdt_true_lb, rdt_true_lb; MPIDI_msg_sz_t sdata_sz; MPIDI_msg_sz_t rdata_sz; MPID_Datatype * sdt_ptr; MPID_Datatype * rdt_ptr; MPI_Aint sdt_extent; MPI_Aint rdt_extent; *smpi_errno = MPI_SUCCESS; *rmpi_errno = MPI_SUCCESS; /* printf("bufcopy: src count=%d dt=%d\n", scount, sdt); */ /* printf("bufcopy: dst count=%d dt=%d\n", rcount, rdt); */ MPIDI_Datatype_get_info(scount, sdt, sdt_contig, sdata_sz, sdt_ptr, sdt_true_lb); MPIDI_Datatype_get_info(rcount, rdt, rdt_contig, rdata_sz, rdt_ptr, rdt_true_lb); /* --BEGIN ERROR HANDLING-- */ if (sdata_sz > rdata_sz) { *rmpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, __FUNCTION__, __LINE__, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", sdata_sz, rdata_sz ); sdata_sz = rdata_sz; } /* --END ERROR HANDLING-- */ if (sdata_sz == 0) { *rsz = 0; goto fn_exit; } if (sdt_contig && rdt_contig) { #if CUDA_AWARE_SUPPORT if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(rbuf)) { cudaError_t cudaerr = CudaMemcpy(rbuf + rdt_true_lb, sbuf + sdt_true_lb, sdata_sz, cudaMemcpyHostToDevice); } else #endif memcpy((char*)rbuf + rdt_true_lb, (const char *)sbuf + sdt_true_lb, sdata_sz); *rsz = sdata_sz; } else if (sdt_contig) { #if CUDA_AWARE_SUPPORT // This will need to be done in two steps: // 1 - Allocate a temp buffer which is the same size as user buffer and unpack in it. // 2 - Copy unpacked data into user buffer from temp buffer. if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(rbuf)) { MPID_Datatype_get_extent_macro(rdt, rdt_extent); char *buf = MPL_malloc(rdt_extent * rcount); memset(buf, 0, rdt_extent * rcount); MPID_Segment seg; DLOOP_Offset last; MPID_Segment_init(buf, rcount, rdt, &seg, 0); last = sdata_sz; MPID_Segment_unpack(&seg, 0, &last, (char*)sbuf + sdt_true_lb); /* --BEGIN ERROR HANDLING-- */ if (last != sdata_sz) { *rmpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, __FUNCTION__, __LINE__, MPI_ERR_TYPE, "**dtypemismatch", 0); } /* --END ERROR HANDLING-- */ *rsz = last; cudaError_t cudaerr = CudaMemcpy(rbuf + rdt_true_lb, buf, rdt_extent * rcount, cudaMemcpyHostToDevice); MPL_free(buf); goto fn_exit; } #endif MPID_Segment seg; DLOOP_Offset last; MPID_Segment_init(rbuf, rcount, rdt, &seg, 0); last = sdata_sz; MPID_Segment_unpack(&seg, 0, &last, (char*)sbuf + sdt_true_lb); /* --BEGIN ERROR HANDLING-- */ if (last != sdata_sz) { *rmpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, __FUNCTION__, __LINE__, MPI_ERR_TYPE, "**dtypemismatch", 0); } /* --END ERROR HANDLING-- */ *rsz = last; } else if (rdt_contig) { MPID_Segment seg; DLOOP_Offset last; MPID_Segment_init(sbuf, scount, sdt, &seg, 0); last = sdata_sz; MPID_Segment_pack(&seg, 0, &last, (char*)rbuf + rdt_true_lb); /* --BEGIN ERROR HANDLING-- */ if (last != sdata_sz) { *rmpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, __FUNCTION__, __LINE__, MPI_ERR_TYPE, "**dtypemismatch", 0); } /* --END ERROR HANDLING-- */ *rsz = last; } else { char * buf; MPIDI_msg_sz_t buf_off; MPID_Segment sseg; MPIDI_msg_sz_t sfirst; MPID_Segment rseg; MPIDI_msg_sz_t rfirst; buf = MPL_malloc(MPIDI_COPY_BUFFER_SZ); /* --BEGIN ERROR HANDLING-- */ if (buf == NULL) { *smpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, __FUNCTION__, __LINE__, MPI_ERR_OTHER, "**nomem", 0); *rmpi_errno = *smpi_errno; *rsz = 0; goto fn_exit; } /* --END ERROR HANDLING-- */ MPID_Segment_init(sbuf, scount, sdt, &sseg, 0); MPID_Segment_init(rbuf, rcount, rdt, &rseg, 0); sfirst = 0; rfirst = 0; buf_off = 0; for(;;) { DLOOP_Offset last; char * buf_end; if (sdata_sz - sfirst > MPIDI_COPY_BUFFER_SZ - buf_off) { last = sfirst + (MPIDI_COPY_BUFFER_SZ - buf_off); } else { last = sdata_sz; } MPID_Segment_pack(&sseg, sfirst, &last, buf + buf_off); /* --BEGIN ERROR HANDLING-- */ MPID_assert(last > sfirst); /* --END ERROR HANDLING-- */ buf_end = buf + buf_off + (last - sfirst); sfirst = last; MPID_Segment_unpack(&rseg, rfirst, &last, buf); /* --BEGIN ERROR HANDLING-- */ MPID_assert(last > rfirst); /* --END ERROR HANDLING-- */ rfirst = last; if (rfirst == sdata_sz) { /* successful completion */ break; } /* --BEGIN ERROR HANDLING-- */ if (sfirst == sdata_sz) { /* datatype mismatch -- remaining bytes could not be unpacked */ *rmpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, __FUNCTION__, __LINE__, MPI_ERR_TYPE, "**dtypemismatch", 0); break; } /* --END ERROR HANDLING-- */ buf_off = sfirst - rfirst; if (buf_off > 0) { memmove(buf, buf_end - buf_off, buf_off); } } *rsz = rfirst; MPL_free(buf); } fn_exit: return; }
void MPIDI_Coll_comm_create(MPID_Comm *comm) { volatile int geom_init = 1; int i; MPIDI_Post_geom_create_t geom_post; TRACE_ERR("MPIDI_Coll_comm_create enter\n"); if (!MPIDI_Process.optimized.collectives) return; if(comm->comm_kind != MPID_INTRACOMM) return; /* Create a geometry */ comm->coll_fns = MPIU_Calloc0(1, MPID_Collops); MPID_assert(comm->coll_fns != NULL); if(comm->mpid.geometry != MPIDI_Process.world_geometry) { if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_0 && comm->rank == 0)) fprintf(stderr,"world geom: %p parent geom: %p\n", MPIDI_Process.world_geometry, comm->mpid.parent); TRACE_ERR("Creating subgeom\n"); /* Change to this at some point */ comm->mpid.tasks = NULL; for(i=1;i<comm->local_size;i++) { /* only if sequential tasks should we use a (single) range. Multi or reordered ranges are inefficient */ if(MPID_VCR_GET_LPID(comm->vcr, i) != (MPID_VCR_GET_LPID(comm->vcr, i-1) + 1)) { /* not sequential, use tasklist */ MPID_VCR_GET_LPIDS(comm, comm->mpid.tasks); break; } } /* Should we use a range? (no task list set) */ if(comm->mpid.tasks == NULL) { /* one range, {first rank ... last rank} */ comm->mpid.range.lo = MPID_VCR_GET_LPID(comm->vcr, 0); comm->mpid.range.hi = MPID_VCR_GET_LPID(comm->vcr, comm->local_size-1); } if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_0 && comm->rank == 0)) fprintf(stderr,"create geometry tasks %p {%u..%u}\n", comm->mpid.tasks, MPID_VCR_GET_LPID(comm->vcr, 0),MPID_VCR_GET_LPID(comm->vcr, comm->local_size-1)); pami_configuration_t config[3]; size_t numconfigs = 0; #ifdef HAVE_PAMI_GEOMETRY_NONCONTIG config[0].name = PAMI_GEOMETRY_NONCONTIG; if(MPIDI_Process.optimized.memory & MPID_OPT_LVL_NONCONTIG) config[0].value.intval = 0; // Disable non-contig, pamid doesn't use pami for non-contig data collectives else config[0].value.intval = 1; // Enable non-contig even though pamid doesn't use pami for non-contig data collectives, // we still possibly want those collectives for other reasons. ++numconfigs; #endif if(MPIDI_Process.optimized.subcomms) { config[numconfigs].name = PAMI_GEOMETRY_OPTIMIZE; config[numconfigs].value.intval = 1; ++numconfigs; } #ifdef HAVE_PAMI_GEOMETRY_MEMORY_OPTIMIZE if(MPIDI_Process.optimized.memory) { config[numconfigs].name = PAMI_GEOMETRY_MEMORY_OPTIMIZE; config[numconfigs].value.intval = MPIDI_Process.optimized.memory; /* level of optimization */ ++numconfigs; } #endif if((MPIDI_Process.optimized.memory & MPID_OPT_LVL_IRREG) && (comm->local_size & (comm->local_size-1))) { /* Don't create irregular geometries. Fallback to MPICH only collectives */ geom_init = 0; comm->mpid.geometry = PAMI_GEOMETRY_NULL; } else if(comm->mpid.tasks == NULL) { geom_post.client = MPIDI_Client; geom_post.configs = config; geom_post.context_offset = 0; /* TODO BES investigate */ geom_post.num_configs = numconfigs; geom_post.newgeom = &comm->mpid.geometry, geom_post.parent = PAMI_GEOMETRY_NULL; geom_post.id = comm->context_id; geom_post.ranges = &comm->mpid.range; geom_post.tasks = NULL;; geom_post.count = (size_t)1; geom_post.fn = geom_create_cb_done; geom_post.cookie = (void*)&geom_init; TRACE_ERR("%s geom_rangelist_create\n", MPIDI_Process.context_post>0?"Posting":"Invoking"); MPIDI_Context_post(MPIDI_Context[0], &geom_post.state, geom_rangelist_create_wrapper, (void *)&geom_post); } else { geom_post.client = MPIDI_Client; geom_post.configs = config; geom_post.context_offset = 0; /* TODO BES investigate */ geom_post.num_configs = numconfigs; geom_post.newgeom = &comm->mpid.geometry, geom_post.parent = PAMI_GEOMETRY_NULL; geom_post.id = comm->context_id; geom_post.ranges = NULL; geom_post.tasks = comm->mpid.tasks; geom_post.count = (size_t)comm->local_size; geom_post.fn = geom_create_cb_done; geom_post.cookie = (void*)&geom_init; TRACE_ERR("%s geom_tasklist_create\n", MPIDI_Process.context_post>0?"Posting":"Invoking"); MPIDI_Context_post(MPIDI_Context[0], &geom_post.state, geom_tasklist_create_wrapper, (void *)&geom_post); } TRACE_ERR("Waiting for geom create to finish\n"); MPID_PROGRESS_WAIT_WHILE(geom_init); if(comm->mpid.geometry == PAMI_GEOMETRY_NULL) { if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_0 && comm->rank == 0)) fprintf(stderr,"Created unoptimized communicator id=%u, size=%u\n", (unsigned) comm->context_id,comm->local_size); MPIU_TestFree(&comm->coll_fns); return; } } /* Initialize the async flow control in case it will be used. */ comm->mpid.num_requests = MPIDI_Process.optimized.num_requests; TRACE_ERR("Querying protocols\n"); /* Determine what protocols are available for this comm/geom */ /* These two functions moved to mpid_collselect.c */ MPIDI_Comm_coll_query(comm); MPIDI_Comm_coll_envvars(comm); if(MPIDI_Process.optimized.select_colls) MPIDI_Comm_coll_select(comm); TRACE_ERR("mpir barrier\n"); int mpierrno = FALSE; /* Switch to comm->coll_fns->fn() */ MPIDO_Barrier(comm, &mpierrno); TRACE_ERR("MPIDI_Coll_comm_create exit\n"); }
/* MSGQUEUE lock must be held by caller */ void MPIDI_Callback_process_unexp(MPID_Request *newreq, pami_context_t context, const MPIDI_MsgInfo * msginfo, size_t sndlen, pami_endpoint_t sender, const void * sndbuf, pami_recv_t * recv, unsigned isSync) { MPID_Request *rreq = NULL; /* ---------------------------------------------------- */ /* Fallback position: */ /* + Request was not posted, or */ /* + Request was long & not contiguous. */ /* We must allocate enough space to hold the message. */ /* The temporary buffer will be unpacked later. */ /* ---------------------------------------------------- */ unsigned rank = msginfo->MPIrank; unsigned tag = msginfo->MPItag; unsigned context_id = msginfo->MPIctxt; #ifndef OUT_OF_ORDER_HANDLING rreq = MPIDI_Recvq_AEU(newreq, rank, tag, context_id); #else unsigned msg_seqno = msginfo->MPIseqno; rreq = MPIDI_Recvq_AEU(newreq, rank, PAMIX_Endpoint_query(sender), tag, context_id, msg_seqno); #endif /* ---------------------- */ /* Copy in information. */ /* ---------------------- */ rreq->status.MPI_SOURCE = rank; rreq->status.MPI_TAG = tag; MPIR_STATUS_SET_COUNT(rreq->status, sndlen); MPIDI_Request_setCA (rreq, MPIDI_CA_COMPLETE); MPIDI_Request_cpyPeerRequestH(rreq, msginfo); MPIDI_Request_setSync (rreq, isSync); /* Set the rank of the sender if a sync msg. */ #ifndef OUT_OF_ORDER_HANDLING if (isSync) { #endif MPIDI_Request_setPeerRank_comm(rreq, rank); MPIDI_Request_setPeerRank_pami(rreq, PAMIX_Endpoint_query(sender)); #ifndef OUT_OF_ORDER_HANDLING } #endif MPID_assert(!sndlen || rreq->mpid.uebuf != NULL); TRACE_MEMSET_R(PAMIX_Endpoint_query(sender),msg_seqno,recv_status); TRACE_SET_R_VAL(PAMIX_Endpoint_query(sender),(msginfo->MPIseqno & SEQMASK),msgid,msginfo->MPIseqno); TRACE_SET_R_VAL(PAMIX_Endpoint_query(sender),(msginfo->MPIseqno & SEQMASK),rtag,tag); TRACE_SET_R_VAL(PAMIX_Endpoint_query(sender),(msginfo->MPIseqno & SEQMASK),rctx,msginfo->MPIctxt); TRACE_SET_R_VAL(PAMIX_Endpoint_query(sender),(msginfo->MPIseqno & SEQMASK),rlen,sndlen); TRACE_SET_R_VAL(PAMIX_Endpoint_query(sender),(msginfo->MPIseqno & SEQMASK),fl.f.sync,isSync); TRACE_SET_R_VAL(PAMIX_Endpoint_query(sender),(msginfo->MPIseqno & SEQMASK),rsource,PAMIX_Endpoint_query(sender)); TRACE_SET_REQ_VAL(rreq->mpid.idx,(msginfo->MPIseqno & SEQMASK)); if (recv != NULL) { recv->local_fn = MPIDI_RecvDoneCB_mutexed; recv->cookie = rreq; /* -------------------------------------------------- */ /* Let PAMI know where to put the rest of the data. */ /* -------------------------------------------------- */ recv->addr = rreq->mpid.uebuf; } else { /* ------------------------------------------------- */ /* We have the data; copy it and complete the msg. */ /* ------------------------------------------------- */ memcpy(rreq->mpid.uebuf, sndbuf, sndlen); MPIDI_RecvDoneCB(context, rreq, PAMI_SUCCESS); /* caller must release rreq, after unlocking MSGQUEUE */ } }
/** * \brief The callback for a new RZV RTS * \note Because this is a short message, the data is already received * \param[in] context The context on which the message is being received. * \param[in] sender The origin endpoint * \param[in] _msginfo The extended header information * \param[in] msginfo_size The size of the extended header information * \param[in] is_zero_byte The rendezvous message is zero bytes in length. */ void MPIDI_RecvRzvCB_impl(pami_context_t context, pami_endpoint_t sender, const void * _msginfo, size_t msginfo_size, const unsigned is_zero_byte) { MPID_assert(_msginfo != NULL); MPID_assert(msginfo_size == sizeof(MPIDI_MsgEnvelope)); const MPIDI_MsgEnvelope * envelope = (const MPIDI_MsgEnvelope *)_msginfo; const MPIDI_MsgInfo * msginfo = (const MPIDI_MsgInfo *)&envelope->msginfo; MPID_Request * rreq = NULL; int found; pami_task_t source; #if TOKEN_FLOW_CONTROL int rettoks=0; #endif /* -------------------- */ /* Match the request. */ /* -------------------- */ unsigned rank = msginfo->MPIrank; unsigned tag = msginfo->MPItag; unsigned context_id = msginfo->MPIctxt; MPID_Request *newreq = MPIDI_Request_create2(); MPIU_THREAD_CS_ENTER(MSGQUEUE,0); source = PAMIX_Endpoint_query(sender); MPIDI_Receive_tokens(msginfo,source); #ifndef OUT_OF_ORDER_HANDLING rreq = MPIDI_Recvq_FDP_or_AEU(newreq, rank, tag, context_id, &found); #else rreq = MPIDI_Recvq_FDP_or_AEU(newreq, rank, source, tag, context_id, msginfo->MPIseqno, &found); #endif TRACE_ERR("RZV CB for req=%p remote-mr=0x%llx bytes=%zu (%sfound)\n", rreq, *(unsigned long long*)&envelope->envelope.memregion, envelope->envelope.length, found?"":"not "); /* ---------------------- */ /* Copy in information. */ /* ---------------------- */ rreq->status.MPI_SOURCE = rank; rreq->status.MPI_TAG = tag; MPIR_STATUS_SET_COUNT(rreq->status, envelope->length); MPIDI_Request_setPeerRank_comm(rreq, rank); MPIDI_Request_setPeerRank_pami(rreq, source); MPIDI_Request_cpyPeerRequestH (rreq, msginfo); MPIDI_Request_setSync (rreq, msginfo->isSync); MPIDI_Request_setRzv (rreq, 1); /* ----------------------------------------------------- */ /* Save the rendezvous information for when the target */ /* node calls a receive function and the data is */ /* retreived from the origin node. */ /* ----------------------------------------------------- */ if (is_zero_byte) { rreq->mpid.envelope.length = 0; rreq->mpid.envelope.data = NULL; } else { #ifdef USE_PAMI_RDMA memcpy(&rreq->mpid.envelope.memregion, &envelope->memregion, sizeof(pami_memregion_t)); #else rreq->mpid.envelope.memregion_used = envelope->memregion_used; if(envelope->memregion_used) { memcpy(&rreq->mpid.envelope.memregion, &envelope->memregion, sizeof(pami_memregion_t)); } rreq->mpid.envelope.data = envelope->data; #endif rreq->mpid.envelope.length = envelope->length; TRACE_SET_R_VAL(source,(rreq->mpid.idx),req,rreq); TRACE_SET_R_VAL(source,(rreq->mpid.idx),rlen,envelope->length); TRACE_SET_R_VAL(source,(rreq->mpid.idx),fl.f.sync,msginfo->isSync); TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.rzv); if (TOKEN_FLOW_CONTROL_ON) { #if TOKEN_FLOW_CONTROL MPIDI_Must_return_tokens(context,source); #else MPID_assert_always(0); #endif } } /* ----------------------------------------- */ /* figure out target buffer for request data */ /* ----------------------------------------- */ if (found) { #if (MPIDI_STATISTICS) MPID_NSTAT(mpid_statp->earlyArrivalsMatched); #endif /* --------------------------- */ /* if synchronized, post ack. */ /* --------------------------- */ if (unlikely(MPIDI_Request_isSync(rreq))) MPIDI_SyncAck_post(context, rreq, MPIDI_Request_getPeerRank_pami(rreq)); MPIU_THREAD_CS_EXIT(MSGQUEUE,0); if (is_zero_byte) MPIDI_RecvRzvDoneCB_zerobyte(context, rreq, PAMI_SUCCESS); else { MPIDI_RendezvousTransfer(context, rreq); TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.sync_com_in_HH); TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.matchedInHH); TRACE_SET_R_VAL(source,(rreq->mpid.idx),bufadd,rreq->mpid.userbuf); } MPID_Request_discard(newreq); } /* ------------------------------------------------------------- */ /* Request was not posted. */ /* ------------------------------------------------------------- */ else { #if (MPIDI_STATISTICS) MPID_NSTAT(mpid_statp->earlyArrivals); #endif /* * This is to test that the fields don't need to be * initialized. Remove after this doesn't fail for a while. */ MPID_assert(rreq->mpid.uebuf == NULL); MPID_assert(rreq->mpid.uebuflen == 0); /* rreq->mpid.uebuf = NULL; */ /* rreq->mpid.uebuflen = 0; */ #ifdef OUT_OF_ORDER_HANDLING if (MPIDI_In_cntr[source].n_OutOfOrderMsgs > 0) { MPIDI_Recvq_process_out_of_order_msgs(source, context); } #endif MPIU_THREAD_CS_EXIT(MSGQUEUE,0); } /* ---------------------------------------- */ /* Signal that the recv has been started. */ /* ---------------------------------------- */ MPIDI_Progress_signal(); }
int MPIDO_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, int *mpierrno) { TRACE_ERR("in mpido_bcast\n"); const size_t BCAST_LIMIT = 0x40000000; int data_contig, rc; void *data_buffer = NULL, *noncontig_buff = NULL; volatile unsigned active = 1; MPI_Aint data_true_lb = 0; MPID_Datatype *data_ptr; MPID_Segment segment; MPIDI_Post_coll_t bcast_post; const struct MPIDI_Comm* const mpid = &(comm_ptr->mpid); const int rank = comm_ptr->rank; #if ASSERT_LEVEL==0 /* We can't afford the tracing in ndebug/performance libraries */ const unsigned verbose = 0; #else const unsigned verbose = (MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL) && (rank == 0); #endif const int selected_type = mpid->user_selected_type[PAMI_XFER_BROADCAST]; /* Must calculate data_size based on count=1 in case it's total size is > integer */ int data_size_one; MPIDI_Datatype_get_info(1, datatype, data_contig, data_size_one, data_ptr, data_true_lb); /* do this calculation once and use twice */ const size_t data_size_sz = (size_t)data_size_one*(size_t)count; if(unlikely(verbose)) fprintf(stderr,"bcast count %d, size %d (%#zX), root %d, buffer %p\n", count,data_size_one, (size_t)data_size_one*(size_t)count, root,buffer); if(unlikely( data_size_sz > BCAST_LIMIT) ) { void *new_buffer=buffer; int c, new_count = (int)BCAST_LIMIT/data_size_one; MPID_assert(new_count > 0); for(c=1; ((size_t)c*(size_t)new_count) <= (size_t)count; ++c) { if ((rc = MPIDO_Bcast(new_buffer, new_count, datatype, root, comm_ptr, mpierrno)) != MPI_SUCCESS) return rc; new_buffer = (char*)new_buffer + (size_t)data_size_one*(size_t)new_count; } new_count = count % new_count; /* 0 is ok, just returns no-op */ return MPIDO_Bcast(new_buffer, new_count, datatype, root, comm_ptr, mpierrno); } /* Must use data_size based on count for byte bcast processing. Previously calculated as a size_t but large data_sizes were handled above so this cast to int should be fine here. */ const int data_size = (int)data_size_sz; if(selected_type == MPID_COLL_USE_MPICH || data_size == 0) { if(unlikely(verbose)) fprintf(stderr,"Using MPICH bcast algorithm\n"); MPIDI_Update_last_algorithm(comm_ptr,"BCAST_MPICH"); return MPIR_Bcast_intra(buffer, count, datatype, root, comm_ptr, mpierrno); } data_buffer = (char *)buffer + data_true_lb; if(!data_contig) { noncontig_buff = MPIU_Malloc(data_size); data_buffer = noncontig_buff; if(noncontig_buff == NULL) { MPID_Abort(NULL, MPI_ERR_NO_SPACE, 1, "Fatal: Cannot allocate pack buffer"); } if(rank == root) { DLOOP_Offset last = data_size; MPID_Segment_init(buffer, count, datatype, &segment, 0); MPID_Segment_pack(&segment, 0, &last, noncontig_buff); } } pami_xfer_t bcast; pami_algorithm_t my_bcast; const pami_metadata_t *my_md = (pami_metadata_t *)NULL; int queryreq = 0; bcast.cb_done = cb_bcast; bcast.cookie = (void *)&active; bcast.cmd.xfer_broadcast.root = MPIDI_Task_to_endpoint(MPID_VCR_GET_LPID(comm_ptr->vcr, root), 0); bcast.algorithm = mpid->user_selected[PAMI_XFER_BROADCAST]; bcast.cmd.xfer_broadcast.buf = data_buffer; bcast.cmd.xfer_broadcast.type = PAMI_TYPE_BYTE; /* Needs to be sizeof(type)*count since we are using bytes as * the generic type */ bcast.cmd.xfer_broadcast.typecount = data_size; if(selected_type == MPID_COLL_OPTIMIZED) { TRACE_ERR("Optimized bcast (%s) and (%s) were pre-selected\n", mpid->opt_protocol_md[PAMI_XFER_BROADCAST][0].name, mpid->opt_protocol_md[PAMI_XFER_BROADCAST][1].name); if(mpid->cutoff_size[PAMI_XFER_BROADCAST][1] != 0)/* SSS: There is FCA cutoff (FCA only sets cutoff for [PAMI_XFER_BROADCAST][1]) */ { if(data_size <= mpid->cutoff_size[PAMI_XFER_BROADCAST][1]) { my_bcast = mpid->opt_protocol[PAMI_XFER_BROADCAST][1]; my_md = &mpid->opt_protocol_md[PAMI_XFER_BROADCAST][1]; queryreq = mpid->must_query[PAMI_XFER_BROADCAST][1]; } else { return MPIR_Bcast_intra(buffer, count, datatype, root, comm_ptr, mpierrno); } } if(data_size > mpid->cutoff_size[PAMI_XFER_BROADCAST][0]) { my_bcast = mpid->opt_protocol[PAMI_XFER_BROADCAST][1]; my_md = &mpid->opt_protocol_md[PAMI_XFER_BROADCAST][1]; queryreq = mpid->must_query[PAMI_XFER_BROADCAST][1]; } else { my_bcast = mpid->opt_protocol[PAMI_XFER_BROADCAST][0]; my_md = &mpid->opt_protocol_md[PAMI_XFER_BROADCAST][0]; queryreq = mpid->must_query[PAMI_XFER_BROADCAST][0]; } } else { TRACE_ERR("Bcast (%s) was specified by user\n", mpid->user_metadata[PAMI_XFER_BROADCAST].name); my_bcast = mpid->user_selected[PAMI_XFER_BROADCAST]; my_md = &mpid->user_metadata[PAMI_XFER_BROADCAST]; queryreq = selected_type; } bcast.algorithm = my_bcast; if(unlikely(queryreq == MPID_COLL_ALWAYS_QUERY || queryreq == MPID_COLL_CHECK_FN_REQUIRED)) { metadata_result_t result = {0}; TRACE_ERR("querying bcast protocol %s, type was: %d\n", my_md->name, queryreq); if(my_md->check_fn != NULL) /* calling the check fn is sufficient */ { metadata_result_t result = {0}; result = my_md->check_fn(&bcast); result.check.nonlocal = 0; /* #warning REMOVE THIS WHEN IMPLEMENTED */ } else /* no check_fn, manually look at the metadata fields */ { TRACE_ERR("Optimzed selection line %d\n",__LINE__); /* Check if the message range if restricted */ if(my_md->check_correct.values.rangeminmax) { if((my_md->range_lo <= data_size) && (my_md->range_hi >= data_size)) ; /* ok, algorithm selected */ else { result.check.range = 1; if(unlikely(verbose)) { fprintf(stderr,"message size (%u) outside range (%zu<->%zu) for %s.\n", data_size, my_md->range_lo, my_md->range_hi, my_md->name); } } } /* \todo check the rest of the metadata */ } TRACE_ERR("bitmask: %#X\n", result.bitmask); if(result.bitmask) { if(unlikely(verbose)) fprintf(stderr,"Using MPICH bcast algorithm - query fn failed\n"); MPIDI_Update_last_algorithm(comm_ptr,"BCAST_MPICH"); return MPIR_Bcast_intra(buffer, count, datatype, root, comm_ptr, mpierrno); } if(my_md->check_correct.values.asyncflowctl && !(--(comm_ptr->mpid.num_requests))) { comm_ptr->mpid.num_requests = MPIDI_Process.optimized.num_requests; int tmpmpierrno; if(unlikely(verbose)) fprintf(stderr,"Query barrier required for %s\n", my_md->name); MPIDO_Barrier(comm_ptr, &tmpmpierrno); } } if(unlikely(verbose)) { unsigned long long int threadID; MPIU_Thread_id_t tid; MPIU_Thread_self(&tid); threadID = (unsigned long long int)tid; fprintf(stderr,"<%llx> Using protocol %s for bcast on %u\n", threadID, my_md->name, (unsigned) comm_ptr->context_id); } MPIDI_Context_post(MPIDI_Context[0], &bcast_post.state, MPIDI_Pami_post_wrapper, (void *)&bcast); MPIDI_Update_last_algorithm(comm_ptr, my_md->name); MPID_PROGRESS_WAIT_WHILE(active); TRACE_ERR("bcast done\n"); if(!data_contig) { if(rank != root) MPIR_Localcopy(noncontig_buff, data_size, MPI_CHAR, buffer, count, datatype); MPIU_Free(noncontig_buff); } TRACE_ERR("leaving bcast\n"); return 0; }
int MPIDI_Win_init( MPI_Aint length, int disp_unit, MPID_Win **win_ptr, MPID_Info *info, MPID_Comm *comm_ptr, int create_flavor, int model) { int mpi_errno=MPI_SUCCESS; size_t rank, size; MPIDI_Win_info *winfo; static char FCNAME[] = "MPIDI_Win_init"; /* ----------------------------------------- */ /* Setup the common sections of the window */ /* ----------------------------------------- */ MPID_Win *win = (MPID_Win*)MPIU_Handle_obj_alloc(&MPID_Win_mem); MPIU_ERR_CHKANDSTMT(win == NULL, mpi_errno, MPI_ERR_NO_MEM, return mpi_errno, "**nomem"); *win_ptr = win; memset(&win->mpid, 0, sizeof(struct MPIDI_Win)); win->comm_ptr = comm_ptr; MPIR_Comm_add_ref(comm_ptr); size = comm_ptr->local_size; rank = comm_ptr->rank; win->mpid.info = MPIU_Malloc(size * sizeof(struct MPIDI_Win_info)); MPID_assert(win->mpid.info != NULL); memset((void *) win->mpid.info,0,(size * sizeof(struct MPIDI_Win_info))); winfo = &win->mpid.info[rank]; win->errhandler = NULL; win->base = NULL; win->size = length; win->disp_unit = disp_unit; win->create_flavor = create_flavor; win->model = model; win->copyCreateFlavor = 0; win->copyModel = 0; win->attributes = NULL; win->comm_ptr = comm_ptr; if ((info != NULL) && ((int *)info != (int *) MPI_INFO_NULL)) { mpi_errno= MPIDI_Win_set_info(win, info); MPID_assert(mpi_errno == 0); } MPID_assert(mpi_errno == 0); /* Initialize the info (hint) flags per window */ win->mpid.info_args.no_locks = 0; win->mpid.info_args.accumulate_ordering = (MPIDI_ACCU_ORDER_RAR | MPIDI_ACCU_ORDER_RAW | MPIDI_ACCU_ORDER_WAR | MPIDI_ACCU_ORDER_WAW); win->mpid.info_args.accumulate_ops = MPIDI_ACCU_SAME_OP_NO_OP; /*default */ win->mpid.info_args.same_size = 0; win->mpid.info_args.alloc_shared_noncontig = 0; win->copyDispUnit=0; win->copySize=0; winfo->memregion_used = 0; winfo->disp_unit = disp_unit; return mpi_errno; }
static inline void MPIDI_RecvShortCB(pami_context_t context, const void * _msginfo, const void * sndbuf, size_t sndlen, pami_endpoint_t sender, unsigned isSync) { MPID_assert(_msginfo != NULL); const MPIDI_MsgInfo *msginfo = (const MPIDI_MsgInfo *)_msginfo; MPID_Request * rreq = NULL; pami_task_t source; #if TOKEN_FLOW_CONTROL int rettoks=0; #endif /* -------------------- */ /* Match the request. */ /* -------------------- */ unsigned rank = msginfo->MPIrank; unsigned tag = msginfo->MPItag; unsigned context_id = msginfo->MPIctxt; MPIU_THREAD_CS_ENTER(MSGQUEUE,0); source = PAMIX_Endpoint_query(sender); MPIDI_Receive_tokens(msginfo,source); #ifndef OUT_OF_ORDER_HANDLING rreq = MPIDI_Recvq_FDP(rank, tag, context_id); #else rreq = MPIDI_Recvq_FDP(rank, source, tag, context_id, msginfo->MPIseqno); #endif /* Match not found */ if (unlikely(rreq == NULL)) { #if (MPIDI_STATISTICS) MPID_NSTAT(mpid_statp->earlyArrivals); #endif MPIU_THREAD_CS_EXIT(MSGQUEUE,0); MPID_Request *newreq = MPIDI_Request_create2(); MPID_assert(newreq != NULL); if (sndlen) { newreq->mpid.uebuflen = sndlen; if (!TOKEN_FLOW_CONTROL_ON) { newreq->mpid.uebuf = MPL_malloc(sndlen); newreq->mpid.uebuf_malloc = mpiuMalloc; } else { #if TOKEN_FLOW_CONTROL MPIU_THREAD_CS_ENTER(MSGQUEUE,0); newreq->mpid.uebuf = MPIDI_mm_alloc(sndlen); newreq->mpid.uebuf_malloc = mpidiBufMM; MPIU_THREAD_CS_EXIT(MSGQUEUE,0); #else MPID_assert_always(0); #endif } MPID_assert(newreq->mpid.uebuf != NULL); } MPIU_THREAD_CS_ENTER(MSGQUEUE,0); #ifndef OUT_OF_ORDER_HANDLING rreq = MPIDI_Recvq_FDP(rank, tag, context_id); #else rreq = MPIDI_Recvq_FDP(rank, PAMIX_Endpoint_query(sender), tag, context_id, msginfo->MPIseqno); #endif if (unlikely(rreq == NULL)) { MPIDI_Callback_process_unexp(newreq, context, msginfo, sndlen, sender, sndbuf, NULL, isSync); /* request is always complete now */ if (TOKEN_FLOW_CONTROL_ON && sndlen) { #if TOKEN_FLOW_CONTROL MPIDI_Token_cntr[source].unmatched++; #else MPID_assert_always(0); #endif } MPIU_THREAD_CS_EXIT(MSGQUEUE,0); MPID_Request_release(newreq); goto fn_exit_short; } else { MPIU_THREAD_CS_EXIT(MSGQUEUE,0); MPID_Request_discard(newreq); } } else { #if (MPIDI_STATISTICS) MPID_NSTAT(mpid_statp->earlyArrivalsMatched); #endif if (TOKEN_FLOW_CONTROL_ON && sndlen) { #if TOKEN_FLOW_CONTROL MPIDI_Update_rettoks(source); MPIDI_Must_return_tokens(context,source); #else MPID_assert_always(0); #endif } MPIU_THREAD_CS_EXIT(MSGQUEUE,0); } /* the receive queue processing has been completed and we found match*/ /* ---------------------- */ /* Copy in information. */ /* ---------------------- */ rreq->status.MPI_SOURCE = rank; rreq->status.MPI_TAG = tag; MPIR_STATUS_SET_COUNT(rreq->status, sndlen); MPIDI_Request_setCA (rreq, MPIDI_CA_COMPLETE); MPIDI_Request_cpyPeerRequestH(rreq, msginfo); MPIDI_Request_setSync (rreq, isSync); MPIDI_Request_setRzv (rreq, 0); /* ----------------------------- */ /* Request was already posted. */ /* ----------------------------- */ if (unlikely(isSync)) MPIDI_SyncAck_post(context, rreq, PAMIX_Endpoint_query(sender)); if (unlikely(HANDLE_GET_KIND(rreq->mpid.datatype) != HANDLE_KIND_BUILTIN)) { MPIDI_Callback_process_userdefined_dt(context, sndbuf, sndlen, rreq); goto fn_exit_short; } size_t dt_size = rreq->mpid.userbufcount * MPID_Datatype_get_basic_size(rreq->mpid.datatype); /* ----------------------------- */ /* Test for truncated message. */ /* ----------------------------- */ if (unlikely(sndlen > dt_size)) { #if ASSERT_LEVEL > 0 MPIDI_Callback_process_trunc(context, rreq, NULL, sndbuf); goto fn_exit_short; #else sndlen = dt_size; #endif } MPID_assert(rreq->mpid.uebuf == NULL); MPID_assert(rreq->mpid.uebuflen == 0); void* rcvbuf = rreq->mpid.userbuf; if (sndlen > 0) { #if CUDA_AWARE_SUPPORT if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(rcvbuf)) { cudaError_t cudaerr = CudaMemcpy(rcvbuf, sndbuf, (size_t)sndlen, cudaMemcpyHostToDevice); } else #endif memcpy(rcvbuf, sndbuf, sndlen); } TRACE_SET_R_VAL(source,(rreq->mpid.idx),rlen,sndlen); TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.comp_in_HH); TRACE_SET_R_VAL(source,(rreq->mpid.idx),bufadd,rreq->mpid.userbuf); MPIDI_Request_complete(rreq); fn_exit_short: #ifdef OUT_OF_ORDER_HANDLING MPIU_THREAD_CS_ENTER(MSGQUEUE,0); if (MPIDI_In_cntr[source].n_OutOfOrderMsgs>0) { MPIDI_Recvq_process_out_of_order_msgs(source, context); } MPIU_THREAD_CS_EXIT(MSGQUEUE,0); #endif /* ---------------------------------------- */ /* Signal that the recv has been started. */ /* ---------------------------------------- */ MPIDI_Progress_signal(); }
static inline int MPIDI_Put_use_pami_rput(pami_context_t context, MPIDI_Win_request * req,int *freed) { pami_result_t rc; void *map; pami_rput_simple_t params; /* params need to zero out to avoid passing garbage to PAMI */ params=zero_rput_parms; params.rma.dest=req->dest; params.rma.hints.buffer_registered = PAMI_HINT_ENABLE; params.rma.hints.use_rdma = PAMI_HINT_ENABLE; params.rma.bytes = 0; params.rma.cookie = req; params.rma.done_fn = NULL; params.rdma.local.mr=&req->origin.memregion; params.rdma.remote.mr=&req->win->mpid.info[req->target.rank].memregion; params.rdma.remote.offset= req->offset; params.put.rdone_fn= MPIDI_Win_DoneCB; struct MPIDI_Win_sync* sync = &req->win->mpid.sync; TRACE_ERR("Start index=%u/%d l-addr=%p r-base=%p r-offset=%zu (sync->started=%u sync->complete=%u)\n", req->state.index, req->target.dt.num_contig, req->buffer, req->win->mpid.info[req->target.rank].base_addr, req->offset, sync->started, sync->complete); while (req->state.index < req->target.dt.num_contig) { if (sync->started > sync->complete + MPIDI_Process.rma_pending) { TRACE_ERR("Bailing out; index=%u/%d sync->started=%u sync->complete=%u\n", req->state.index, req->target.dt.num_contig, sync->started, sync->complete); return PAMI_EAGAIN; } ++sync->started; params.rma.bytes = req->target.dt.map[req->state.index].DLOOP_VECTOR_LEN; params.rdma.remote.offset = req->offset + (size_t)req->target.dt.map[req->state.index].DLOOP_VECTOR_BUF; params.rdma.local.offset = req->state.local_offset; #ifdef TRACE_ON unsigned* buf = (unsigned*)(req->buffer + params.rdma.local.offset); #endif TRACE_ERR(" Sub index=%u bytes=%zu l-offset=%zu r-offset=%zu buf=%p *(int*)buf=0x%08x\n", req->state.index, params.rma.bytes, params.rdma.local.offset, params.rdma.remote.offset, buf, *buf); /** sync->total will be updated with every RMA and the complete will not change till that RMA has completed. In the meanwhile the rest of the RMAs will have memory leaks */ if (req->target.dt.num_contig - req->state.index == 1) { map=NULL; if (req->target.dt.map != &req->target.dt.__map) { map=(void *) req->target.dt.map; } rc = PAMI_Rput(context, ¶ms); MPID_assert(rc == PAMI_SUCCESS); if (map) { MPIU_Free(map); } *freed=1; return PAMI_SUCCESS; } else { rc = PAMI_Rput(context, ¶ms); MPID_assert(rc == PAMI_SUCCESS); req->state.local_offset += params.rma.bytes; ++req->state.index; } } return PAMI_SUCCESS; }