static int handler_recv_unpack_complete(const ptl_event_t *e) { int mpi_errno = MPI_SUCCESS; MPIR_Request *const rreq = e->user_ptr; void *buf; MPI_Aint last; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_HANDLER_RECV_UNPACK_COMPLETE); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_HANDLER_RECV_UNPACK_COMPLETE); MPIR_Assert(e->type == PTL_EVENT_REPLY || e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW); if (e->type == PTL_EVENT_PUT_OVERFLOW) buf = e->start; else buf = REQ_PTL(rreq)->chunk_buffer[0]; last = rreq->dev.segment_first + e->mlength; MPIDU_Segment_unpack(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, buf); MPIR_Assert(last == rreq->dev.segment_first + e->mlength); mpi_errno = handler_recv_complete(e); if (mpi_errno) MPIR_ERR_POP(mpi_errno); fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_HANDLER_RECV_UNPACK_COMPLETE); return mpi_errno; fn_fail: goto fn_exit; }
/* Add description to an existing or new category * IN: cat_name, name of the category * IN: cat_desc, description of the category */ int MPIR_T_cat_add_desc(const char *cat_name, const char *cat_desc) { int cat_idx, mpi_errno = MPI_SUCCESS; name2index_hash_t *hash_entry; cat_table_entry_t *cat; /* NULL args are not allowed */ MPIR_Assert(cat_name); MPIR_Assert(cat_desc); MPL_HASH_FIND_STR(cat_hash, cat_name, hash_entry); if (hash_entry != NULL) { /* Found it, i.e., category already exists */ cat_idx = hash_entry->idx; cat = (cat_table_entry_t *)utarray_eltptr(cat_table, cat_idx); MPIR_Assert(cat->desc == NULL); cat->desc = MPL_strdup(cat_desc); MPIR_Assert(cat->desc); } else { /* Not found, so create a new category */ cat = MPIR_T_cat_create(cat_name); cat->desc = MPL_strdup(cat_desc); MPIR_Assert(cat->desc); /* Notify categories have been changed */ cat_stamp++; } return mpi_errno; }
static int handler_recv_big_get(const ptl_event_t *e) { int mpi_errno = MPI_SUCCESS; MPIR_Request *const rreq = e->user_ptr; MPI_Aint last; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_HANDLER_RECV_UNPACK); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_HANDLER_RECV_UNPACK); MPIR_Assert(e->type == PTL_EVENT_REPLY); /* decrement the number of remaining gets */ REQ_PTL(rreq)->num_gets--; if (REQ_PTL(rreq)->num_gets == 0) { /* if we used a temporary buffer, unpack the data */ if (REQ_PTL(rreq)->chunk_buffer[0]) { last = rreq->dev.segment_size; MPIDU_Segment_unpack(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, REQ_PTL(rreq)->chunk_buffer[0]); MPIR_Assert(last == rreq->dev.segment_size); } mpi_errno = handler_recv_complete(e); } if (mpi_errno) MPIR_ERR_POP(mpi_errno); fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_HANDLER_RECV_UNPACK); return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_CH3_Req_handler_rma_op_complete(MPIR_Request * sreq) { int mpi_errno = MPI_SUCCESS; MPIR_Request *ureq = NULL; MPIR_Win *win_ptr = NULL; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQ_HANDLER_RMA_OP_COMPLETE); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQ_HANDLER_RMA_OP_COMPLETE); if (sreq->dev.rma_target_ptr != NULL) { (sreq->dev.rma_target_ptr)->num_pkts_wait_for_local_completion--; } /* get window, decrement active request cnt on window */ MPIR_Win_get_ptr(sreq->dev.source_win_handle, win_ptr); MPIR_Assert(win_ptr != NULL); MPIDI_CH3I_RMA_Active_req_cnt--; MPIR_Assert(MPIDI_CH3I_RMA_Active_req_cnt >= 0); if (sreq->dev.request_handle != MPI_REQUEST_NULL) { /* get user request */ MPIR_Request_get_ptr(sreq->dev.request_handle, ureq); mpi_errno = MPID_Request_complete(ureq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } } fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQ_HANDLER_RMA_OP_COMPLETE); return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_Ineighbor_alltoallw_impl(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPIR_Comm *comm_ptr, MPI_Request *request) { int mpi_errno = MPI_SUCCESS; int tag = -1; MPIR_Request *reqp = NULL; MPIR_Sched_t s = MPIR_SCHED_NULL; *request = MPI_REQUEST_NULL; mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Sched_create(&s); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_Assert(comm_ptr->coll_fns != NULL); MPIR_Assert(comm_ptr->coll_fns->Ineighbor_alltoallw != NULL); mpi_errno = comm_ptr->coll_fns->Ineighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm_ptr, s); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp); if (reqp) *request = reqp->handle; if (mpi_errno) MPIR_ERR_POP(mpi_errno); fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
/*@ MPIDI_CH3U_VC_SendClose - Initiate a close on a virtual connection Input Parameters: + vc - Virtual connection to close - i - rank of virtual connection within a process group (used for debugging) Notes: The current state of this connection must be either 'MPIDI_VC_STATE_ACTIVE' or 'MPIDI_VC_STATE_REMOTE_CLOSE'. @*/ int MPIDI_CH3U_VC_SendClose( MPIDI_VC_t *vc, int rank ) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_close_t * close_pkt = &upkt.close; MPIR_Request * sreq; int mpi_errno = MPI_SUCCESS; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_VC_SENDCLOSE); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_VC_SENDCLOSE); MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); MPIR_Assert( vc->state == MPIDI_VC_STATE_ACTIVE || vc->state == MPIDI_VC_STATE_REMOTE_CLOSE ); MPIDI_Pkt_init(close_pkt, MPIDI_CH3_PKT_CLOSE); close_pkt->ack = (vc->state == MPIDI_VC_STATE_ACTIVE) ? FALSE : TRUE; /* MT: this is not thread safe, the POBJ CS is scoped to the vc and * doesn't protect this global correctly */ MPIDI_Outstanding_close_ops += 1; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_DISCONNECT,TYPICAL,(MPL_DBG_FDEST, "sending close(%s) on vc (pg=%p) %p to rank %d, ops = %d", close_pkt->ack ? "TRUE" : "FALSE", vc->pg, vc, rank, MPIDI_Outstanding_close_ops)); /* * A close packet acknowledging this close request could be * received during iStartMsg, therefore the state must * be changed before the close packet is sent. */ if (vc->state == MPIDI_VC_STATE_ACTIVE) { MPIDI_CHANGE_VC_STATE(vc, LOCAL_CLOSE); } else { MPIR_Assert( vc->state == MPIDI_VC_STATE_REMOTE_CLOSE ); MPIDI_CHANGE_VC_STATE(vc, CLOSE_ACKED); } mpi_errno = MPIDI_CH3_iStartMsg(vc, close_pkt, sizeof(*close_pkt), &sreq); MPIR_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|send_close_ack"); if (sreq != NULL) { /* There is still another reference being held by the channel. It will not be released until the pkt is actually sent. */ MPIR_Request_free(sreq); } fn_exit: MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_VC_SENDCLOSE); return mpi_errno; fn_fail: goto fn_exit; }
/* A low level, generic and internally used interface to register * a pvar to MPIR_T. Other modules should use interfaces defined * for concrete pvar classes. * * IN: varclass, MPI_T_PVAR_CLASS_* * IN: dtype, MPI datatype for this pvar * IN: name, Name of the pvar * IN: addr, Pointer to the pvar if known at registeration, otherwise NULL. * IN: count, # of elements of this pvar if known at registeration, otherwise 0. * IN: etype, MPI_T_enum or MPI_T_ENUM_NULL * IN: verb, MPI_T_PVAR_VERBOSITY_* * IN: binding, MPI_T_BIND_* * IN: flags, Bitwise OR of MPIR_T_R_PVAR_FLAGS_{} * IN: get_value, If not NULL, it is a callback to read the pvar. * IN: get_count, If not NULL, it is a callback to read count of the pvar. * IN: cat, Catogery name of the pvar * IN: desc, Description of the pvar */ void MPIR_T_PVAR_REGISTER_impl( int varclass, MPI_Datatype dtype, const char* name, void *addr, int count, MPIR_T_enum_t *etype, int verb, int binding, int flags, MPIR_T_pvar_get_value_cb get_value, MPIR_T_pvar_get_count_cb get_count, const char * cat, const char * desc) { name2index_hash_t *hash_entry; pvar_table_entry_t *pvar; int pvar_idx; int seq = varclass - MPIR_T_PVAR_CLASS_FIRST; /* Check whether this is a replicated pvar, whose name is unique per class */ MPL_HASH_FIND_STR(pvar_hashs[seq], name, hash_entry); if (hash_entry != NULL) { /* Found it, the pvar already exists */ pvar_idx = hash_entry->idx; pvar = (pvar_table_entry_t *)utarray_eltptr(pvar_table, pvar_idx); /* Should never override an existing & active var */ MPIR_Assert(pvar->active != TRUE); pvar->active = TRUE; /* FIXME: Do we need to check consistency between the old and new? */ } else { /* Not found, so push the pvar to back of pvar_table */ utarray_extend_back(pvar_table); pvar = (pvar_table_entry_t *)utarray_back(pvar_table); pvar->active = TRUE; pvar->varclass = varclass; pvar->datatype = dtype; pvar->name = MPL_strdup(name); MPIR_Assert(pvar->name); pvar->addr = addr; pvar->count = count; pvar->enumtype = etype; pvar->verbosity = verb; pvar->bind = binding; pvar->flags = flags; pvar->get_value = get_value; pvar->get_count = get_count; pvar->desc = MPL_strdup(desc); MPIR_Assert(pvar->desc); /* Record <name, index> in hash table */ pvar_idx = utarray_len(pvar_table) - 1; hash_entry = MPL_malloc(sizeof(name2index_hash_t)); MPIR_Assert(hash_entry); /* Need not to Strdup name, since pvar_table and pvar_hashs co-exist */ hash_entry->name = name; hash_entry->idx = pvar_idx; MPL_HASH_ADD_KEYPTR(hh, pvar_hashs[seq], hash_entry->name, strlen(hash_entry->name), hash_entry); /* Add the pvar to a category */ MPIR_T_cat_add_pvar(cat, utarray_len(pvar_table)-1); } }
int MPIR_Ireduce_sched_intra_binomial(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm * comm_ptr, MPIR_Sched_t s) { int mpi_errno = MPI_SUCCESS; int comm_size, rank, is_commutative; int mask, relrank, source, lroot; MPI_Aint true_lb, true_extent, extent; void *tmp_buf; MPIR_SCHED_CHKPMEM_DECL(2); MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM); if (count == 0) return MPI_SUCCESS; comm_size = comm_ptr->local_size; rank = comm_ptr->rank; /* set op_errno to 0. stored in perthread structure */ { MPIR_Per_thread_t *per_thread = NULL; int err = 0; MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key, MPIR_Per_thread, per_thread, &err); MPIR_Assert(err == 0); per_thread->op_errno = 0; } /* Create a temporary buffer */ MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent); MPIR_Datatype_get_extent_macro(datatype, extent); is_commutative = MPIR_Op_is_commutative(op); /* I think this is the worse case, so we can avoid an assert() * inside the for loop */ /* should be buf+{this}? */ MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent)); MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, count * (MPL_MAX(extent, true_extent)), mpi_errno, "temporary buffer", MPL_MEM_BUFFER); /* adjust for potential negative lower bound in datatype */ tmp_buf = (void *) ((char *) tmp_buf - true_lb); /* If I'm not the root, then my recvbuf may not be valid, therefore * I have to allocate a temporary one */ if (rank != root) { MPIR_SCHED_CHKPMEM_MALLOC(recvbuf, void *, count * (MPL_MAX(extent, true_extent)), mpi_errno, "receive buffer", MPL_MEM_BUFFER); recvbuf = (void *) ((char *) recvbuf - true_lb); }
/* maps rank r in comm_ptr to the rank of the leader for r's node in comm_ptr->node_roots_comm and returns this value. This function does NOT use mpich error handling. */ int MPIR_Get_internode_rank(MPIR_Comm * comm_ptr, int r) { int mpi_errno = MPI_SUCCESS; MPIR_Comm_valid_ptr(comm_ptr, mpi_errno, TRUE); MPIR_Assert(mpi_errno == MPI_SUCCESS); MPIR_Assert(r < comm_ptr->remote_size); MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM); MPIR_Assert(comm_ptr->internode_table != NULL); return comm_ptr->internode_table[r]; }
/* maps rank r in comm_ptr to the rank in comm_ptr->node_comm or -1 if r is not a member of comm_ptr->node_comm. This function does NOT use mpich error handling. */ int MPIR_Get_intranode_rank(MPIR_Comm * comm_ptr, int r) { int mpi_errno = MPI_SUCCESS; MPIR_Comm_valid_ptr(comm_ptr, mpi_errno, TRUE); MPIR_Assert(mpi_errno == MPI_SUCCESS); MPIR_Assert(r < comm_ptr->remote_size); MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM); MPIR_Assert(comm_ptr->intranode_table != NULL); /* FIXME this could/should be a list of ranks on the local node, which * should take up much less space on a typical thin(ish)-node system. */ return comm_ptr->intranode_table[r]; }
/* Add an item to an exisiting enum. * IN: handle, handle to the enum * IN: item_name, name of the item * IN: item_value, value associated with item_name */ void MPIR_T_enum_add_item(MPI_T_enum handle, const char *item_name, int item_value) { enum_item_t *item; MPIR_Assert(handle); MPIR_Assert(item_name); utarray_extend_back(handle->items); item = (enum_item_t *)utarray_back(handle->items); item->name = MPL_strdup(item_name); MPIR_Assert(item->name); item->value = item_value; }
int MPIR_Iscan_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s) { int mpi_errno = MPI_SUCCESS; int rank = comm_ptr->rank; MPIR_Comm *node_comm; MPIR_Comm *roots_comm; MPI_Aint true_extent, true_lb, extent; void *tempbuf = NULL; void *prefulldata = NULL; void *localfulldata = NULL; MPIR_SCHED_CHKPMEM_DECL(3); /* In order to use the SMP-aware algorithm, the "op" can be either commutative or non-commutative, but we require a communicator in which all the nodes contain processes with consecutive ranks. */ if (!MPII_Comm_is_node_consecutive(comm_ptr)) { /* We can't use the SMP-aware algorithm, use the generic one */ return MPIR_Iscan_rec_dbl(sendbuf, recvbuf, count, datatype, op, comm_ptr, s); } node_comm = comm_ptr->node_comm; roots_comm = comm_ptr->node_roots_comm; if (node_comm) { MPIR_Assert(node_comm->coll_fns && node_comm->coll_fns->Iscan_sched && node_comm->coll_fns->Ibcast_sched); } if (roots_comm) { MPIR_Assert(roots_comm->coll_fns && roots_comm->coll_fns->Iscan_sched); } MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent); MPID_Datatype_get_extent_macro(datatype, extent); MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent)); MPIR_SCHED_CHKPMEM_MALLOC(tempbuf, void *, count*(MPL_MAX(extent, true_extent)), mpi_errno, "temporary buffer"); tempbuf = (void *)((char*)tempbuf - true_lb); /* Create prefulldata and localfulldata on local roots of all nodes */ if (comm_ptr->node_roots_comm != NULL) { MPIR_SCHED_CHKPMEM_MALLOC(prefulldata, void *, count*(MPL_MAX(extent, true_extent)), mpi_errno, "prefulldata for scan"); prefulldata = (void *)((char*)prefulldata - true_lb); if (node_comm != NULL) { MPIR_SCHED_CHKPMEM_MALLOC(localfulldata, void *, count*(MPL_MAX(extent, true_extent)), mpi_errno, "localfulldata for scan"); localfulldata = (void *)((char*)localfulldata - true_lb); }
int MPIDI_check_for_failed_procs(void) { int mpi_errno = MPI_SUCCESS; int pmi_errno; int len; char *kvsname = MPIDI_global.jobid; char *failed_procs_string = NULL; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CHECK_FOR_FAILED_PROCS); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CHECK_FOR_FAILED_PROCS); /* FIXME: Currently this only handles failed processes in * comm_world. We need to fix hydra to include the pgid along * with the rank, then we need to create the failed group from * something bigger than comm_world. */ #ifdef USE_PMIX_API MPIR_Assert(0); #elif defined(USE_PMI2_API) { int vallen = 0; len = PMI2_MAX_VALLEN; failed_procs_string = MPL_malloc(len, MPL_MEM_OTHER); MPIR_Assert(failed_procs_string); pmi_errno = PMI2_KVS_Get(kvsname, PMI2_ID_NULL, "PMI_dead_processes", failed_procs_string, len, &vallen); MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get"); MPL_free(failed_procs_string); } #else pmi_errno = PMI_KVS_Get_value_length_max(&len); MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get_value_length_max"); failed_procs_string = MPL_malloc(len, MPL_MEM_OTHER); MPIR_Assert(failed_procs_string); pmi_errno = PMI_KVS_Get(kvsname, "PMI_dead_processes", failed_procs_string, len); MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get"); MPL_free(failed_procs_string); #endif MPL_DBG_MSG_FMT(MPIDI_CH4_DBG_GENERAL, VERBOSE, (MPL_DBG_FDEST, "Received proc fail notification: %s", failed_procs_string)); /* FIXME: handle ULFM failed groups here */ fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CHECK_FOR_FAILED_PROCS); return mpi_errno; fn_fail: MPL_free(failed_procs_string); goto fn_exit; }
static int handler_recv_dequeue_unpack_large(const ptl_event_t *e) { int mpi_errno = MPI_SUCCESS; MPIR_Request *const rreq = e->user_ptr; MPIDI_VC_t *vc; MPI_Aint last; void *buf; MPIR_CHKPMEM_DECL(1); MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_HANDLER_RECV_DEQUEUE_UNPACK_LARGE); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_HANDLER_RECV_DEQUEUE_UNPACK_LARGE); MPIR_Assert(e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW); MPIDI_Comm_get_vc(rreq->comm, NPTL_MATCH_GET_RANK(e->match_bits), &vc); dequeue_req(e); if (!(e->hdr_data & NPTL_LARGE)) { /* all data has already been received; we're done */ mpi_errno = handler_recv_unpack_complete(e); if (mpi_errno) MPIR_ERR_POP(mpi_errno); goto fn_exit; } if (e->type == PTL_EVENT_PUT_OVERFLOW) buf = e->start; else buf = REQ_PTL(rreq)->chunk_buffer[0]; MPIR_Assert(e->mlength == PTL_LARGE_THRESHOLD); last = PTL_LARGE_THRESHOLD; MPIDU_Segment_unpack(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, buf); MPIR_Assert(last == PTL_LARGE_THRESHOLD); rreq->dev.segment_first += PTL_LARGE_THRESHOLD; MPL_free(REQ_PTL(rreq)->chunk_buffer[0]); MPIR_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, rreq->dev.segment_size - rreq->dev.segment_first, mpi_errno, "chunk_buffer"); big_get(REQ_PTL(rreq)->chunk_buffer[0], rreq->dev.segment_size - rreq->dev.segment_first, vc, e->match_bits, rreq); fn_exit: MPIR_CHKPMEM_COMMIT(); fn_exit2: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_HANDLER_RECV_DEQUEUE_UNPACK_LARGE); return mpi_errno; fn_fail: MPIR_CHKPMEM_REAP(); goto fn_exit2; }
static inline void create_dt_map() { int i, j; size_t dtsize[FI_DATATYPE_LAST]; dtsize[FI_INT8] = sizeof(int8_t); dtsize[FI_UINT8] = sizeof(uint8_t); dtsize[FI_INT16] = sizeof(int16_t); dtsize[FI_UINT16] = sizeof(uint16_t); dtsize[FI_INT32] = sizeof(int32_t); dtsize[FI_UINT32] = sizeof(uint32_t); dtsize[FI_INT64] = sizeof(int64_t); dtsize[FI_UINT64] = sizeof(uint64_t); dtsize[FI_FLOAT] = sizeof(float); dtsize[FI_DOUBLE] = sizeof(double); dtsize[FI_FLOAT_COMPLEX] = sizeof(float complex); dtsize[FI_DOUBLE_COMPLEX] = sizeof(double complex); dtsize[FI_LONG_DOUBLE] = sizeof(long double); dtsize[FI_LONG_DOUBLE_COMPLEX] = sizeof(long double complex); /* when atomics are disabled and atomics capability are not * enabled call fo fi_atomic*** may crash */ MPIR_Assert(MPIDI_OFI_ENABLE_ATOMICS); for (i = 0; i < MPIDI_OFI_DT_SIZES; i++) for (j = 0; j < MPIDI_OFI_OP_SIZES; j++) { enum fi_datatype fi_dt = (enum fi_datatype) -1; enum fi_op fi_op = (enum fi_op) -1; mpi_to_ofi(mpi_dtypes[i], &fi_dt, mpi_ops[j], &fi_op); MPIR_Assert(fi_dt != (enum fi_datatype) -1); MPIR_Assert(fi_op != (enum fi_op) -1); _TBL.dt = fi_dt; _TBL.op = fi_op; _TBL.atomic_valid = 0; _TBL.max_atomic_count = 0; _TBL.max_fetch_atomic_count = 0; _TBL.max_compare_atomic_count = 0; _TBL.mpi_acc_valid = check_mpi_acc_valid(mpi_dtypes[i], mpi_ops[j]); ssize_t ret; size_t atomic_count; if (fi_dt != FI_DATATYPE_LAST && fi_op != FI_ATOMIC_OP_LAST) { CHECK_ATOMIC(fi_atomicvalid, atomic_valid, max_atomic_count); CHECK_ATOMIC(fi_fetch_atomicvalid, fetch_atomic_valid, max_fetch_atomic_count); CHECK_ATOMIC(fi_compare_atomicvalid, compare_atomic_valid, max_compare_atomic_count); _TBL.dtsize = dtsize[fi_dt]; } } }
/* Providing a comm argument permits optimization, but this function is always allowed to return the max for the universe. */ int MPID_Get_max_node_id(MPIR_Comm *comm, MPID_Node_id_t *max_id_p) { /* easiest way to implement this is to track it at PG create/destroy time */ *max_id_p = g_max_node_id; MPIR_Assert(*max_id_p >= 0); return MPI_SUCCESS; }
int MPIDI_CH3U_Handle_send_req(MPIDI_VC_t * vc, MPIR_Request * sreq, int *complete) { int mpi_errno = MPI_SUCCESS; int (*reqFn) (MPIDI_VC_t *, MPIR_Request *, int *); MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_SEND_REQ); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_SEND_REQ); /* Use the associated function rather than switching on the old ca field */ /* Routines can call the attached function directly */ reqFn = sreq->dev.OnDataAvail; if (!reqFn) { MPIR_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP); mpi_errno = MPID_Request_complete(sreq); *complete = 1; } else { mpi_errno = reqFn(vc, sreq, complete); } if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_SEND_REQ); return mpi_errno; fn_fail: goto fn_exit; }
static void dump_context_id(MPIR_Context_id_t context_id, char *out_str, int len) { int subcomm_type = MPIR_CONTEXT_READ_FIELD(SUBCOMM, context_id); const char *subcomm_type_name = NULL; switch (subcomm_type) { case 0: subcomm_type_name = "parent"; break; case 1: subcomm_type_name = "intranode"; break; case 2: subcomm_type_name = "internode"; break; default: MPIR_Assert(FALSE); break; } MPL_snprintf(out_str, len, "context_id=%d (%#x): DYNAMIC_PROC=%d PREFIX=%#x IS_LOCALCOMM=%d SUBCOMM=%s SUFFIX=%s", context_id, context_id, MPIR_CONTEXT_READ_FIELD(DYNAMIC_PROC, context_id), MPIR_CONTEXT_READ_FIELD(PREFIX, context_id), MPIR_CONTEXT_READ_FIELD(IS_LOCALCOMM, context_id), subcomm_type_name, (MPIR_CONTEXT_READ_FIELD(SUFFIX, context_id) ? "coll" : "pt2pt")); }
void MPIR_Get_count_impl(const MPI_Status *status, MPI_Datatype datatype, int *count) { MPI_Count size; MPID_Datatype_get_size_macro(datatype, size); MPIR_Assert(size >= 0 && MPIR_STATUS_GET_COUNT(*status) >= 0); if (size != 0) { /* MPI-3 says return MPI_UNDEFINED if too large for an int */ if ((MPIR_STATUS_GET_COUNT(*status) % size) != 0 || ((MPIR_STATUS_GET_COUNT(*status) / size) > INT_MAX)) (*count) = MPI_UNDEFINED; else (*count) = (int)(MPIR_STATUS_GET_COUNT(*status) / size); } else { if (MPIR_STATUS_GET_COUNT(*status) > 0) { /* --BEGIN ERROR HANDLING-- */ /* case where datatype size is 0 and count is > 0 should * never occur. */ (*count) = MPI_UNDEFINED; /* --END ERROR HANDLING-- */ } else { /* This is ambiguous. However, discussions on MPI Forum reached a consensus that this is the correct return value */ (*count) = 0; } } }
int MPIR_Neighbor_alltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr) { int mpi_errno = MPI_SUCCESS; MPIR_Assert(comm_ptr->coll_fns != NULL); MPIR_Assert(comm_ptr->coll_fns->Neighbor_alltoall != NULL); mpi_errno = comm_ptr->coll_fns->Neighbor_alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr); if (mpi_errno) MPIR_ERR_POP(mpi_errno); fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_Neighbor_alltoallw_impl(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPIR_Comm *comm_ptr) { int mpi_errno = MPI_SUCCESS; MPIR_Assert(comm_ptr->coll_fns != NULL); MPIR_Assert(comm_ptr->coll_fns->Neighbor_alltoallw != NULL); mpi_errno = comm_ptr->coll_fns->Neighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm_ptr); if (mpi_errno) MPIR_ERR_POP(mpi_errno); fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_OFI_control_handler(int handler_id, void *am_hdr, void **data, size_t * data_sz, int is_local, int *is_contig, MPIDIG_am_target_cmpl_cb * target_cmpl_cb, MPIR_Request ** req) { int mpi_errno = MPI_SUCCESS; MPIDI_OFI_send_control_t *ctrlsend = (MPIDI_OFI_send_control_t *) am_hdr; *req = NULL; *target_cmpl_cb = NULL; switch (ctrlsend->type) { case MPIDI_OFI_CTRL_HUGEACK:{ mpi_errno = MPIDI_OFI_dispatch_function(NULL, ctrlsend->ackreq); goto fn_exit; } break; case MPIDI_OFI_CTRL_HUGE:{ mpi_errno = MPIDI_OFI_get_huge(ctrlsend); goto fn_exit; } break; default: fprintf(stderr, "Bad control type: 0x%08x %d\n", ctrlsend->type, ctrlsend->type); MPIR_Assert(0); } fn_exit: return mpi_errno; }
/* MPII_Dataloop_stackelm_offset - returns starting offset (displacement) for stackelm * based on current count in stackelm. * * NOTE: loop_p, orig_count, and curcount members of stackelm MUST be correct * before this is called! * * also, this really is only good at init time for vectors and contigs * (all the time for indexed) at the moment. * */ MPI_Aint MPII_Dataloop_stackelm_offset(struct MPII_Dataloop_stackelm * elmp) { struct MPIR_Dataloop *dlp = elmp->loop_p; switch (dlp->kind & MPII_DATALOOP_KIND_MASK) { case MPII_DATALOOP_KIND_VECTOR: case MPII_DATALOOP_KIND_CONTIG: return 0; break; case MPII_DATALOOP_KIND_BLOCKINDEXED: return dlp->loop_params.bi_t.offset_array[elmp->orig_count - elmp->curcount]; break; case MPII_DATALOOP_KIND_INDEXED: return dlp->loop_params.i_t.offset_array[elmp->orig_count - elmp->curcount]; break; case MPII_DATALOOP_KIND_STRUCT: return dlp->loop_params.s_t.offset_array[elmp->orig_count - elmp->curcount]; break; default: /* --BEGIN ERROR HANDLING-- */ MPIR_Assert(0); break; /* --END ERROR HANDLING-- */ } return -1; }
/* MPII_Dataloop_stackelm_blocksize - returns block size for stackelm based on current * count in stackelm. * * NOTE: loop_p, orig_count, and curcount members of stackelm MUST be correct * before this is called! * */ MPI_Aint MPII_Dataloop_stackelm_blocksize(struct MPII_Dataloop_stackelm * elmp) { struct MPIR_Dataloop *dlp = elmp->loop_p; switch (dlp->kind & MPII_DATALOOP_KIND_MASK) { case MPII_DATALOOP_KIND_CONTIG: /* NOTE: we're dropping the count into the * blksize field for contigs, as described * in the init call. */ return dlp->loop_params.c_t.count; break; case MPII_DATALOOP_KIND_VECTOR: return dlp->loop_params.v_t.blocksize; break; case MPII_DATALOOP_KIND_BLOCKINDEXED: return dlp->loop_params.bi_t.blocksize; break; case MPII_DATALOOP_KIND_INDEXED: return dlp->loop_params.i_t.blocksize_array[elmp->orig_count - elmp->curcount]; break; case MPII_DATALOOP_KIND_STRUCT: return dlp->loop_params.s_t.blocksize_array[elmp->orig_count - elmp->curcount]; break; default: /* --BEGIN ERROR HANDLING-- */ MPIR_Assert(0); break; /* --END ERROR HANDLING-- */ } return -1; }
int MPIDI_CH3U_Handle_ordered_recv_pkt(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt, void *data, intptr_t *buflen, MPIR_Request ** rreqp) { int mpi_errno = MPI_SUCCESS; static MPIDI_CH3_PktHandler_Fcn *pktArray[MPIDI_CH3_PKT_END_CH3+1]; static int needsInit = 1; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_ORDERED_RECV_PKT); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_ORDERED_RECV_PKT); MPL_DBG_STMT(MPIDI_CH3_DBG_OTHER,VERBOSE,MPIDI_DBG_Print_packet(pkt)); /* FIXME: We can turn this into something like MPIR_Assert(pkt->type <= MAX_PACKET_TYPE); mpi_errno = MPIDI_CH3_ProgressFunctions[pkt->type](vc,pkt,rreqp); in the progress engine itself. Then this routine is not necessary. */ if (needsInit) { MPIDI_CH3_PktHandler_Init( pktArray, MPIDI_CH3_PKT_END_CH3 ); needsInit = 0; } /* Packet type is an enum and hence >= 0 */ MPIR_Assert(pkt->type <= MPIDI_CH3_PKT_END_CH3); mpi_errno = pktArray[pkt->type](vc, pkt, data, buflen, rreqp); MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_ORDERED_RECV_PKT); return mpi_errno; }
static MPIR_Request * create_request(void * hdr, intptr_t hdr_sz, size_t nb) { MPIR_Request * sreq; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CREATE_REQUEST); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CREATE_REQUEST); sreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED); /* --BEGIN ERROR HANDLING-- */ if (sreq == NULL) return NULL; /* --END ERROR HANDLING-- */ MPIR_Object_set_ref(sreq, 2); sreq->kind = MPIR_REQUEST_KIND__SEND; MPIR_Assert(hdr_sz == sizeof(MPIDI_CH3_Pkt_t)); sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) hdr; sreq->dev.iov[0].MPL_IOV_BUF = (MPL_IOV_BUF_CAST)((char *) &sreq->dev.pending_pkt + nb); sreq->dev.iov[0].MPL_IOV_LEN = hdr_sz - nb; sreq->dev.iov_count = 1; sreq->dev.OnDataAvail = 0; MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CREATE_REQUEST); return sreq; }
int MPIR_Topology_put( MPIR_Comm *comm_ptr, MPIR_Topology *topo_ptr ) { int mpi_errno = MPI_SUCCESS; MPIR_Assert(comm_ptr != NULL); if (MPIR_Topology_keyval == MPI_KEYVAL_INVALID) { /* Create a new keyval */ /* FIXME - thread safe code needs a thread lock here, followed by another test on the keyval to see if a different thread got there first */ mpi_errno = MPIR_Comm_create_keyval_impl( MPIR_Topology_copy_fn, MPIR_Topology_delete_fn, &MPIR_Topology_keyval, 0 ); /* Register the finalize handler */ if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_Add_finalize( MPIR_Topology_finalize, (void*)0, MPIR_FINALIZE_CALLBACK_PRIO-1); } mpi_errno = MPIR_Comm_set_attr_impl(comm_ptr, MPIR_Topology_keyval, topo_ptr, MPIR_ATTR_PTR); if (mpi_errno) MPIR_ERR_POP(mpi_errno); fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_T_pvar_handle_free_impl(MPI_T_pvar_session session, MPI_T_pvar_handle *handle) { int mpi_errno = MPI_SUCCESS; MPIR_T_pvar_handle_t *hnd = *handle; DL_DELETE(session->hlist, hnd); /* Unlink handle from pvar if it is a watermark */ if (MPIR_T_pvar_is_watermark(hnd)) { MPIR_T_pvar_watermark_t *mark = (MPIR_T_pvar_watermark_t *)hnd->addr; if (MPIR_T_pvar_is_first(hnd)) { mark->first_used = 0; mark->first_started = 0; } else { MPIR_Assert(mark->hlist); if (mark->hlist == hnd) { /* hnd happens to be the head */ mark->hlist = hnd->next2; if (mark->hlist != NULL) mark->hlist->prev2 = mark->hlist; } else { hnd->prev2->next2 = hnd->next2; if (hnd->next2 != NULL) hnd->next2->prev2 = hnd->prev2; } } } MPL_free(hnd); *handle = MPI_T_PVAR_HANDLE_NULL; return mpi_errno; }
uint64_t MPIDI_OFI_mr_key_alloc() { uint64_t i; for (i = mr_key_allocator.last_free_mr_key; i < mr_key_allocator.num_ints; i++) { if (mr_key_allocator.bitmask[i]) { register uint64_t val, nval; val = mr_key_allocator.bitmask[i]; nval = 2; MPIDI_OFI_INDEX_CALC(val, nval, 32, 0xFFFFFFFFULL); MPIDI_OFI_INDEX_CALC(val, nval, 16, 0xFFFFULL); MPIDI_OFI_INDEX_CALC(val, nval, 8, 0xFFULL); MPIDI_OFI_INDEX_CALC(val, nval, 4, 0xFULL); MPIDI_OFI_INDEX_CALC(val, nval, 2, 0x3ULL); nval -= val & 0x1ULL; mr_key_allocator.bitmask[i] &= ~(0x1ULL << (nval - 1)); mr_key_allocator.last_free_mr_key = i; return i * sizeof(uint64_t) * 8 + (nval - 1); } if (i == mr_key_allocator.num_ints - 1) { mr_key_allocator.num_ints += mr_key_allocator.chunk_size; mr_key_allocator.bitmask = MPL_realloc(mr_key_allocator.bitmask, sizeof(uint64_t) * mr_key_allocator.num_ints, MPL_MEM_RMA); MPIR_Assert(mr_key_allocator.bitmask); memset(&mr_key_allocator.bitmask[i + 1], 0xFF, sizeof(uint64_t) * mr_key_allocator.chunk_size); } } return -1; }
static MPIR_Request *create_request(MPL_IOV * iov, int iov_count, int iov_offset, size_t nb) { MPIR_Request *sreq; int i; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CREATE_REQUEST); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CREATE_REQUEST); sreq = MPIR_Request_create(MPIR_REQUEST_KIND__SEND); /* --BEGIN ERROR HANDLING-- */ if (sreq == NULL) return NULL; /* --END ERROR HANDLING-- */ MPIR_Object_set_ref(sreq, 2); for (i = 0; i < iov_count; i++) { sreq->dev.iov[i] = iov[i]; } if (iov_offset == 0) { MPIR_Assert(iov[0].MPL_IOV_LEN == sizeof(MPIDI_CH3_Pkt_t)); sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) iov[0].MPL_IOV_BUF; sreq->dev.iov[0].MPL_IOV_BUF = (MPL_IOV_BUF_CAST) & sreq->dev.pending_pkt; } sreq->dev.iov[iov_offset].MPL_IOV_BUF = (MPL_IOV_BUF_CAST) ((char *) sreq->dev.iov[iov_offset].MPL_IOV_BUF + nb); sreq->dev.iov[iov_offset].MPL_IOV_LEN -= nb; sreq->dev.iov_count = iov_count; sreq->dev.OnDataAvail = 0; MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CREATE_REQUEST); return sreq; }