/* * Given an incoming segment, lookup the endpoint that sent it */ static inline ompi_btl_usnic_endpoint_t * lookup_sender(ompi_btl_usnic_module_t *module, ompi_btl_usnic_segment_t *seg) { int ret; ompi_btl_usnic_endpoint_t *sender; /* Use the hashed RTE process name in the BTL header to uniquely identify the sending process (using the MAC/hardware address only identifies the sending server -- not the sending RTE process). */ /* JMS We've experimented with using a handshake before sending any data so that instead of looking up a hash on the btl_header->sender, echo back the ptr to the sender's ompi_proc. There was limited speedup with this scheme; more investigation is required. */ ret = opal_hash_table_get_value_uint64(&module->senders, seg->us_btl_header->sender, (void**) &sender); if (OPAL_LIKELY(OPAL_SUCCESS == ret)) { return sender; } /* The sender wasn't in the hash table, so do a slow lookup and put the result in the hash table */ sender = ompi_btl_usnic_proc_lookup_endpoint(module, seg->us_btl_header->sender); if (NULL != sender) { opal_hash_table_set_value_uint64(&module->senders, seg->us_btl_header->sender, sender); return sender; } /* Whoa -- not found at all! */ return NULL; }
void mca_oob_tcp_component_set_module(int fd, short args, void *cbdata) { mca_oob_tcp_peer_op_t *pop = (mca_oob_tcp_peer_op_t*)cbdata; uint64_t ui64; int rc; orte_oob_base_peer_t *bpr; opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:set_module called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&pop->peer)); /* make sure the OOB knows that we can reach this peer - we * are in the same event base as the OOB base, so we can * directly access its storage */ memcpy(&ui64, (char*)&pop->peer, sizeof(uint64_t)); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers, ui64, (void**)&bpr) || NULL == bpr) { bpr = OBJ_NEW(orte_oob_base_peer_t); } opal_bitmap_set_bit(&bpr->addressable, mca_oob_tcp_component.super.idx); bpr->component = &mca_oob_tcp_component.super; if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers, ui64, bpr))) { ORTE_ERROR_LOG(rc); } OBJ_RELEASE(pop); }
/* * Look for an existing SCTP process instance based on the globally unique * process identifier. */ mca_btl_sctp_proc_t* mca_btl_sctp_proc_lookup(const ompi_process_name_t *name) { mca_btl_sctp_proc_t* proc = NULL; OPAL_THREAD_LOCK(&mca_btl_sctp_component.sctp_lock); opal_hash_table_get_value_uint64(&mca_btl_sctp_component.sctp_procs, ompi_rte_hash_name(name), (void**)&proc); OPAL_THREAD_UNLOCK(&mca_btl_sctp_component.sctp_lock); return proc; }
mca_btl_sctp_proc_t* mca_btl_sctp_proc_create(ompi_proc_t* ompi_proc) { int rc; size_t size; mca_btl_sctp_proc_t* btl_proc; uint64_t hash = orte_util_hash_name(&ompi_proc->proc_name); OPAL_THREAD_LOCK(&mca_btl_sctp_component.sctp_lock); rc = opal_hash_table_get_value_uint64(&mca_btl_sctp_component.sctp_procs, hash, (void**)&btl_proc); if(OMPI_SUCCESS == rc) { OPAL_THREAD_UNLOCK(&mca_btl_sctp_component.sctp_lock); return btl_proc; } btl_proc = OBJ_NEW(mca_btl_sctp_proc_t); if(NULL == btl_proc) { return NULL; } btl_proc->proc_ompi = ompi_proc; btl_proc->proc_name = ompi_proc->proc_name; /* add to hash table of all proc instance */ opal_hash_table_set_value_uint64(&mca_btl_sctp_component.sctp_procs, hash, btl_proc); OPAL_THREAD_UNLOCK(&mca_btl_sctp_component.sctp_lock); /* lookup sctp parameters exported by this proc */ rc = ompi_modex_recv( &mca_btl_sctp_component.super.btl_version, ompi_proc, (void**)&btl_proc->proc_addrs, &size ); if(rc != OMPI_SUCCESS) { BTL_ERROR(("mca_base_modex_recv: failed with return value=%d", rc)); OBJ_RELEASE(btl_proc); return NULL; } if(0 != (size % sizeof(mca_btl_sctp_addr_t))) { BTL_ERROR(("mca_base_modex_recv: invalid size %" PRIsize_t "\n", size)); return NULL; } btl_proc->proc_addr_count = size / sizeof(mca_btl_sctp_addr_t); /* allocate space for endpoint array - one for each exported address */ btl_proc->proc_endpoints = (mca_btl_base_endpoint_t**) malloc(btl_proc->proc_addr_count * sizeof(mca_btl_base_endpoint_t*)); if(NULL == btl_proc->proc_endpoints) { OBJ_RELEASE(btl_proc); return NULL; } if(NULL == mca_btl_sctp_component.sctp_local && ompi_proc == ompi_proc_local()) { mca_btl_sctp_component.sctp_local = btl_proc; } return btl_proc; }
static int component_set_addr(orte_process_name_t *peer, char **uris) { orte_proc_t *proc; mca_oob_usock_peer_t *pr; uint64_t *ui64; /* if I am an application, then everything is addressable * by me via my daemon */ if (ORTE_PROC_IS_APP) { ui64 = (uint64_t*)peer; if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_usock_module.peers, (*ui64), (void**)&pr) || NULL == pr) { pr = OBJ_NEW(mca_oob_usock_peer_t); pr->name = *peer; opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, (*ui64), pr); return ORTE_SUCCESS; } } /* if I am a daemon or HNP, I can only reach my * own local procs via this component */ if (ORTE_PROC_MY_NAME->jobid == peer->jobid) { /* another daemon */ return ORTE_ERR_TAKE_NEXT_OPTION; } if (NULL == (proc = orte_get_proc_object(peer)) || !proc->local_proc) { return ORTE_ERR_TAKE_NEXT_OPTION; } /* indicate that this peer is addressable by this component */ ui64 = (uint64_t*)peer; if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_usock_module.peers, (*ui64), (void**)&pr) || NULL == pr) { pr = OBJ_NEW(mca_oob_usock_peer_t); pr->name = *peer; opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, (*ui64), pr); } return ORTE_SUCCESS; }
mca_oob_tcp_peer_t* mca_oob_tcp_peer_lookup(const orte_process_name_t *name) { mca_oob_tcp_peer_t *peer; uint64_t ui64; memcpy(&ui64, (char*)name, sizeof(uint64_t)); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_tcp_module.peers, ui64, (void**)&peer)) { return NULL; } return peer; }
int mca_oob_ud_peer_lookup (const orte_process_name_t *name, mca_oob_ud_peer_t **peer) { int rc; *peer = NULL; rc = opal_hash_table_get_value_uint64(&mca_oob_ud_component.ud_peers, orte_util_hash_name(name), (void**)peer); if (OPAL_SUCCESS != rc) { return ORTE_ERR_UNREACH; } return ORTE_SUCCESS; }
/** * Find proc_data_t container associated with given * opal_identifier_t. */ static proc_data_t* lookup_opal_proc(opal_hash_table_t *jtable, opal_identifier_t id) { proc_data_t *proc_data = NULL; opal_hash_table_get_value_uint64(jtable, id, (void**)&proc_data); if (NULL == proc_data) { /* The proc clearly exists, so create a data structure for it */ proc_data = OBJ_NEW(proc_data_t); if (NULL == proc_data) { opal_output(0, "db:hash:lookup_opal_proc: unable to allocate proc_data_t\n"); return NULL; } opal_hash_table_set_value_uint64(jtable, id, proc_data); } return proc_data; }
void mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata) { mca_oob_tcp_peer_op_t *pop = (mca_oob_tcp_peer_op_t*)cbdata; uint64_t ui64; orte_oob_base_peer_t *bpr; int rc; opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:lost connection called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&pop->peer)); /* if we are terminating, or recovery isn't enabled, then don't attempt to reconnect */ if (!orte_enable_recovery || orte_orteds_term_ordered || orte_finalizing || orte_abnormal_term_ordered) { goto cleanup; } /* Mark that we no longer support this peer */ memcpy(&ui64, (char*)&pop->peer, sizeof(uint64_t)); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers, ui64, (void**)&bpr) || NULL == bpr) { bpr = OBJ_NEW(orte_oob_base_peer_t); } opal_bitmap_clear_bit(&bpr->addressable, mca_oob_tcp_component.super.idx); if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers, ui64, NULL))) { ORTE_ERROR_LOG(rc); } cleanup: /* activate the proc state */ if (ORTE_SUCCESS != orte_routed.route_lost(&pop->peer)) { ORTE_ACTIVATE_PROC_STATE(&pop->peer, ORTE_PROC_STATE_LIFELINE_LOST); } else { ORTE_ACTIVATE_PROC_STATE(&pop->peer, ORTE_PROC_STATE_COMM_FAILED); } OBJ_RELEASE(pop); }
void mca_oob_tcp_component_no_route(int fd, short args, void *cbdata) { mca_oob_tcp_msg_error_t *mop = (mca_oob_tcp_msg_error_t*)cbdata; uint64_t ui64; int rc; orte_oob_base_peer_t *bpr; opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:no route called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&mop->hop)); /* mark that we cannot reach this hop */ memcpy(&ui64, (char*)&(mop->hop), sizeof(uint64_t)); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers, ui64, (void**)&bpr) || NULL == bpr) { bpr = OBJ_NEW(orte_oob_base_peer_t); } opal_bitmap_clear_bit(&bpr->addressable, mca_oob_tcp_component.super.idx); if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers, ui64, NULL))) { ORTE_ERROR_LOG(rc); } /* report the error back to the OOB and let it try other components * or declare a problem */ if (!orte_finalizing && !orte_abnormal_term_ordered) { /* if this was a lifeline, then alert */ if (ORTE_SUCCESS != orte_routed.route_lost(&mop->hop)) { ORTE_ACTIVATE_PROC_STATE(&mop->hop, ORTE_PROC_STATE_LIFELINE_LOST); } else { ORTE_ACTIVATE_PROC_STATE(&mop->hop, ORTE_PROC_STATE_COMM_FAILED); } } OBJ_RELEASE(mop); }
/** * Find modex_proc_data_t container associated with given * orte_process_name_t. * * The global lock should *NOT* be held when * calling this function. */ static modex_proc_data_t* modex_lookup_orte_proc(const orte_process_name_t *orte_proc) { modex_proc_data_t *proc_data = NULL; OPAL_THREAD_LOCK(&mutex); opal_hash_table_get_value_uint64(modex_data, orte_util_hash_name(orte_proc), (void**)&proc_data); if (NULL == proc_data) { /* The proc clearly exists, so create a modex structure for it */ proc_data = OBJ_NEW(modex_proc_data_t); if (NULL == proc_data) { opal_output(0, "grpcomm_basic_modex_lookup_orte_proc: unable to allocate modex_proc_data_t\n"); OPAL_THREAD_UNLOCK(&mutex); return NULL; } opal_hash_table_set_value_uint64(modex_data, orte_util_hash_name(orte_proc), proc_data); } OPAL_THREAD_UNLOCK(&mutex); return proc_data; }
int MPI_Type_create_f90_complex(int p, int r, MPI_Datatype *newtype) { uint64_t key; int p_key, r_key; OPAL_CR_NOOP_PROGRESS(); if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); /* Note: These functions accept negative integers for the p and r * arguments. This is because for the SELECTED_COMPLEX_KIND, * negative numbers are equivalent to zero values. See section * 13.14.95 of the Fortran 95 standard. */ if ((MPI_UNDEFINED == p && MPI_UNDEFINED == r)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } /* if the user does not care about p or r set them to 0 so the * test associate with them will always succeed. */ p_key = p; r_key = r; if( MPI_UNDEFINED == p ) p_key = 0; if( MPI_UNDEFINED == r ) r_key = 0; /** * With respect to the MPI standard, MPI-2.0 Sect. 10.2.5, MPI_TYPE_CREATE_F90_xxxx, * page 295, line 47 we handle this nicely by caching the values in a hash table. * However, as the value of might not always make sense, a little bit of optimization * might be a good idea. Therefore, first we try to see if we can handle the value * with some kind of default value, and if it's the case then we look into the * cache. */ if ( (LDBL_DIG < p) || (LDBL_MAX_10_EXP < r) || (-LDBL_MIN_10_EXP < r) ) *newtype = &ompi_mpi_datatype_null.dt; else if( (DBL_DIG < p) || (DBL_MAX_10_EXP < r) || (-DBL_MIN_10_EXP < r) ) *newtype = &ompi_mpi_ldblcplex.dt; else if( (FLT_DIG < p) || (FLT_MAX_10_EXP < r) || (-FLT_MIN_10_EXP < r) ) *newtype = &ompi_mpi_dblcplex.dt; else *newtype = &ompi_mpi_cplex.dt; if( *newtype != &ompi_mpi_datatype_null.dt ) { ompi_datatype_t* datatype; const int* a_i[2]; int rc; key = (((uint64_t)p_key) << 32) | ((uint64_t)r_key); if( OPAL_SUCCESS == opal_hash_table_get_value_uint64( &ompi_mpi_f90_complex_hashtable, key, (void**)newtype ) ) { return MPI_SUCCESS; } /* Create the duplicate type corresponding to selected type, then * set the argument to be a COMBINER with the correct value of r * and add it to the hash table. */ if (OMPI_SUCCESS != ompi_datatype_duplicate( *newtype, &datatype)) { OMPI_ERRHANDLER_RETURN (MPI_ERR_INTERN, MPI_COMM_WORLD, MPI_ERR_INTERN, FUNC_NAME ); } /* Make sure the user is not allowed to free this datatype as specified * in the MPI standard. */ datatype->super.flags |= OMPI_DATATYPE_FLAG_PREDEFINED; /* Mark the datatype as a special F90 convenience type */ // Specifically using opal_snprintf() here (instead of // snprintf()) so that over-eager compilers do not warn us // that we may be truncating the output. We *know* that the // output may be truncated, and that's ok. opal_snprintf(datatype->name, sizeof(datatype->name), "COMBINER %s", (*newtype)->name); a_i[0] = &p; a_i[1] = &r; ompi_datatype_set_args( datatype, 2, a_i, 0, NULL, 0, NULL, MPI_COMBINER_F90_COMPLEX ); rc = opal_hash_table_set_value_uint64( &ompi_mpi_f90_complex_hashtable, key, datatype ); if (OMPI_SUCCESS != rc) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, rc, FUNC_NAME); } *newtype = datatype; return MPI_SUCCESS; } return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); }
mca_btl_tcp2_proc_t* mca_btl_tcp2_proc_create(ompi_proc_t* ompi_proc) { int rc; size_t size; mca_btl_tcp2_proc_t* btl_proc; uint64_t hash = orte_util_hash_name(&ompi_proc->proc_name); OPAL_THREAD_LOCK(&mca_btl_tcp2_component.tcp_lock); rc = opal_hash_table_get_value_uint64(&mca_btl_tcp2_component.tcp_procs, hash, (void**)&btl_proc); if(OMPI_SUCCESS == rc) { OPAL_THREAD_UNLOCK(&mca_btl_tcp2_component.tcp_lock); return btl_proc; } btl_proc = OBJ_NEW(mca_btl_tcp2_proc_t); if(NULL == btl_proc) return NULL; btl_proc->proc_ompi = ompi_proc; /* add to hash table of all proc instance */ opal_hash_table_set_value_uint64(&mca_btl_tcp2_component.tcp_procs, hash, btl_proc); OPAL_THREAD_UNLOCK(&mca_btl_tcp2_component.tcp_lock); /* lookup tcp parameters exported by this proc */ rc = ompi_modex_recv( &mca_btl_tcp2_component.super.btl_version, ompi_proc, (void**)&btl_proc->proc_addrs, &size ); if(rc != OMPI_SUCCESS) { BTL_ERROR(("mca_base_modex_recv: failed with return value=%d", rc)); OBJ_RELEASE(btl_proc); return NULL; } if(0 != (size % sizeof(mca_btl_tcp2_addr_t))) { BTL_ERROR(("mca_base_modex_recv: invalid size %lu: btl-size: %lu\n", (unsigned long) size, (unsigned long)sizeof(mca_btl_tcp2_addr_t))); return NULL; } btl_proc->proc_addr_count = size / sizeof(mca_btl_tcp2_addr_t); /* allocate space for endpoint array - one for each exported address */ btl_proc->proc_endpoints = (mca_btl_base_endpoint_t**) malloc((1 + btl_proc->proc_addr_count) * sizeof(mca_btl_base_endpoint_t*)); if(NULL == btl_proc->proc_endpoints) { OBJ_RELEASE(btl_proc); return NULL; } if(NULL == mca_btl_tcp2_component.tcp_local && ompi_proc == ompi_proc_local()) { mca_btl_tcp2_component.tcp_local = btl_proc; } { /* convert the OMPI addr_family field to OS constants, * so we can check for AF_INET (or AF_INET6) and don't have * to deal with byte ordering anymore. */ unsigned int i; for (i = 0; i < btl_proc->proc_addr_count; i++) { if (MCA_BTL_TCP_AF_INET == btl_proc->proc_addrs[i].addr_family) { btl_proc->proc_addrs[i].addr_family = AF_INET; } #if OPAL_WANT_IPV6 if (MCA_BTL_TCP_AF_INET6 == btl_proc->proc_addrs[i].addr_family) { btl_proc->proc_addrs[i].addr_family = AF_INET6; } #endif } } return btl_proc; }
static inline int mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module) { uint64_t datagram_id, data, proc_id; uint32_t remote_addr, remote_id; mca_btl_base_endpoint_t *ep; gni_post_state_t post_state; gni_ep_handle_t handle; gni_return_t grc; int count = 0, rc; /* check for datagram completion */ OPAL_THREAD_LOCK(&ugni_module->device->dev_lock); /* TODO: may not need lock for this function */ grc = GNI_PostDataProbeById (ugni_module->device->dev_handle, &datagram_id); if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) { OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock); return 0; } data = datagram_id & ~(MCA_BTL_UGNI_DATAGRAM_MASK); BTL_VERBOSE(("datgram_id: %" PRIx64 ", mask: %" PRIx64, datagram_id, (uint64_t) (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK))); if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID) { ep = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&ugni_module->endpoints, data); handle = ep->smsg_ep_handle; } else { handle = ugni_module->wildcard_ep; } /* wait for the incoming datagram to complete (in case it isn't) */ grc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state, &remote_addr, &remote_id); OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock); if (GNI_RC_SUCCESS != grc) { BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc)); return opal_common_rc_ugni_to_opal (grc); } /* if this is a wildcard endpoint lookup the remote peer by the proc id we received */ if (handle == ugni_module->wildcard_ep) { proc_id = mca_btl_ugni_proc_name_to_id (ugni_module->wc_remote_attr.proc_name); BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64, proc_id)); OPAL_THREAD_LOCK(&ugni_module->endpoint_lock); rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep); OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock); /* check if the endpoint is known */ if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == ep)) { struct opal_proc_t *remote_proc = opal_proc_for_name (ugni_module->wc_remote_attr.proc_name); BTL_VERBOSE(("Got connection request from an unknown peer {jobid = 0x%x, vid = 0x%x}", ugni_module->wc_remote_attr.proc_name.jobid, ugni_module->wc_remote_attr.proc_name.vpid)); ep = mca_btl_ugni_get_ep (&ugni_module->super, remote_proc); if (OPAL_UNLIKELY(NULL == ep)) { return rc; } } } else { BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep)); } /* should not have gotten a NULL endpoint */ assert (NULL != ep); BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, " "data = 0x%" PRIx64 ", ep = %p, remote id: %d", datagram_id, post_state, data, (void *) ep, remote_id)); /* NTH: TODO -- error handling */ opal_mutex_lock (&ep->lock); if (handle != ugni_module->wildcard_ep) { /* directed post complete */ ep->dg_posted = false; } (void) mca_btl_ugni_ep_connect_progress (ep); opal_mutex_unlock (&ep->lock); if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) { /* process messages waiting in the endpoint's smsg mailbox */ count = mca_btl_ugni_smsg_process (ep); } /* repost the wildcard datagram */ if (handle == ugni_module->wildcard_ep) { mca_btl_ugni_wildcard_ep_post (ugni_module); } return count; }
void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata) { mca_oob_tcp_msg_error_t *mop = (mca_oob_tcp_msg_error_t*)cbdata; uint64_t ui64; orte_rml_send_t *snd; orte_oob_base_peer_t *bpr; opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:unknown hop called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&mop->hop)); if (orte_finalizing || orte_abnormal_term_ordered) { /* just ignore the problem */ OBJ_RELEASE(mop); return; } /* mark that this component cannot reach this hop */ memcpy(&ui64, (char*)&(mop->hop), sizeof(uint64_t)); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers, ui64, (void**)&bpr) || NULL == bpr) { /* the overall OOB has no knowledge of this hop. Only * way this could happen is if the peer contacted us * via this component, and it wasn't entered into the * OOB framework hash table. We have no way of knowing * what to do next, so just output an error message and * abort */ opal_output(0, "%s ERROR: message to %s requires routing and the OOB has no knowledge of the reqd hop %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&mop->snd->hdr.dst), ORTE_NAME_PRINT(&mop->hop)); ORTE_ACTIVATE_PROC_STATE(&mop->hop, ORTE_PROC_STATE_COMM_FAILED); OBJ_RELEASE(mop); return; } opal_bitmap_clear_bit(&bpr->addressable, mca_oob_tcp_component.super.idx); /* mark that this component cannot reach this destination either */ memcpy(&ui64, (char*)&(mop->snd->hdr.dst), sizeof(uint64_t)); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers, ui64, (void**)&bpr) || NULL == bpr) { opal_output(0, "%s ERROR: message to %s requires routing and the OOB has no knowledge of this process", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&mop->snd->hdr.dst)); ORTE_ACTIVATE_PROC_STATE(&mop->hop, ORTE_PROC_STATE_COMM_FAILED); OBJ_RELEASE(mop); return; } opal_bitmap_clear_bit(&bpr->addressable, mca_oob_tcp_component.super.idx); /* post the message to the OOB so it can see * if another component can transfer it */ MCA_OOB_TCP_HDR_NTOH(&mop->snd->hdr); snd = OBJ_NEW(orte_rml_send_t); snd->dst = mop->snd->hdr.dst; snd->origin = mop->snd->hdr.origin; snd->tag = mop->snd->hdr.tag; snd->data = mop->snd->data; snd->count = mop->snd->hdr.nbytes; snd->cbfunc.iov = NULL; snd->cbdata = NULL; /* activate the OOB send state */ ORTE_OOB_SEND(snd); /* protect the data */ mop->snd->data = NULL; OBJ_RELEASE(mop); }
static inline int mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module) { uint32_t remote_addr, remote_id; uint64_t datagram_id, data; mca_btl_base_endpoint_t *ep; gni_post_state_t post_state; gni_ep_handle_t handle; gni_return_t grc; int count = 0, rc; /* check for datagram completion */ grc = GNI_PostDataProbeById (ugni_module->device->dev_handle, &datagram_id); if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) { return 0; } data = datagram_id & ~(MCA_BTL_UGNI_DATAGRAM_MASK); BTL_VERBOSE(("datgram_id: %" PRIx64 ", mask: %" PRIx64, datagram_id, (uint64_t) (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK))); if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID) { ep = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&ugni_module->endpoints, data); handle = ep->smsg_ep_handle; } else { handle = ugni_module->wildcard_ep; } /* wait for the incoming datagram to complete (in case it isn't) */ grc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state, &remote_addr, &remote_id); if (GNI_RC_SUCCESS != grc) { BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc)); return ompi_common_rc_ugni_to_ompi (grc); } /* if this is a wildcard endpoint lookup the remote peer by the proc id we received */ if (handle == ugni_module->wildcard_ep) { BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64, ugni_module->wc_remote_attr.proc_id)); rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, ugni_module->wc_remote_attr.proc_id, (void *) &ep); /* check if the endpoint is known */ if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == ep)) { BTL_ERROR(("received connection attempt from an unknown peer. rc: %d, ep: %p, id: 0x%" PRIx64, rc, ep, ugni_module->wc_remote_attr.proc_id)); return OMPI_ERR_NOT_FOUND; } } else { BTL_VERBOSE(("directed datagram complete for endpoint %p", ep)); } /* should not have gotten a NULL endpoint */ assert (NULL != ep); BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, " "data = 0x%" PRIx64 ", ep = %p, remote id: %d", datagram_id, post_state, data, ep, remote_id)); /* NTH: TODO -- error handling */ (void) mca_btl_ugni_ep_connect_progress (ep); if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) { /* process messages waiting in the endpoint's smsg mailbox */ count = mca_btl_ugni_smsg_process (ep); } /* repost the wildcard datagram */ if (handle == ugni_module->wildcard_ep) { mca_btl_ugni_wildcard_ep_post (ugni_module); } return count; }
void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) { mca_oob_tcp_peer_t* peer = (mca_oob_tcp_peer_t*)cbdata; int rc; orte_process_name_t hop; mca_oob_tcp_peer_t *relay; uint64_t ui64; if (orte_abnormal_term_ordered) { return; } opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); switch (peer->state) { case MCA_OOB_TCP_CONNECT_ACK: if (ORTE_SUCCESS == (rc = mca_oob_tcp_peer_recv_connect_ack(peer, peer->sd, NULL))) { opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler starting send/recv events", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); /* we connected! Start the send/recv events */ if (!peer->recv_ev_active) { opal_event_add(&peer->recv_event, 0); peer->recv_ev_active = true; } if (peer->timer_ev_active) { opal_event_del(&peer->timer_event); peer->timer_ev_active = false; } /* if there is a message waiting to be sent, queue it */ if (NULL == peer->send_msg) { peer->send_msg = (mca_oob_tcp_send_t*)opal_list_remove_first(&peer->send_queue); } if (NULL != peer->send_msg && !peer->send_ev_active) { opal_event_add(&peer->send_event, 0); peer->send_ev_active = true; } /* update our state */ peer->state = MCA_OOB_TCP_CONNECTED; } else { opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s UNABLE TO COMPLETE CONNECT ACK WITH %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); opal_event_del(&peer->recv_event); ORTE_FORCED_TERMINATE(1); return; } break; case MCA_OOB_TCP_CONNECTED: opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler CONNECTED", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); /* allocate a new message and setup for recv */ if (NULL == peer->recv_msg) { opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler allocate new recv msg", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); peer->recv_msg = OBJ_NEW(mca_oob_tcp_recv_t); if (NULL == peer->recv_msg) { opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: unable to allocate recv message\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name))); return; } /* start by reading the header */ peer->recv_msg->rdptr = (char*)&peer->recv_msg->hdr; peer->recv_msg->rdbytes = sizeof(mca_oob_tcp_hdr_t); } /* if the header hasn't been completely read, read it */ if (!peer->recv_msg->hdr_recvd) { opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler read hdr", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); if (ORTE_SUCCESS == (rc = read_bytes(peer))) { OPAL_TIMING_EVENT((&tm,"Header received from %s", ORTE_NAME_PRINT(&peer->name))); /* completed reading the header */ peer->recv_msg->hdr_recvd = true; /* convert the header */ MCA_OOB_TCP_HDR_NTOH(&peer->recv_msg->hdr); /* if this is a zero-byte message, then we are done */ if (0 == peer->recv_msg->hdr.nbytes) { opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s RECVD ZERO-BYTE MESSAGE FROM %s for tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name), peer->recv_msg->hdr.tag); peer->recv_msg->data = NULL; // make sure } else { opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler allocate data region of size %lu", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)peer->recv_msg->hdr.nbytes); /* allocate the data region */ peer->recv_msg->data = (char*)malloc(peer->recv_msg->hdr.nbytes); /* point to it */ peer->recv_msg->rdptr = peer->recv_msg->data; peer->recv_msg->rdbytes = peer->recv_msg->hdr.nbytes; } /* fall thru and attempt to read the data */ } else if (ORTE_ERR_RESOURCE_BUSY == rc || ORTE_ERR_WOULD_BLOCK == rc) { /* exit this event and let the event lib progress */ return; } else { /* close the connection */ opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler error reading bytes - closing connection", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); mca_oob_tcp_peer_close(peer); return; } } if (peer->recv_msg->hdr_recvd) { /* continue to read the data block - we start from * wherever we left off, which could be at the * beginning or somewhere in the message */ if (ORTE_SUCCESS == (rc = read_bytes(peer))) { OPAL_TIMING_EVENT((&tm,"Msg received from %s", ORTE_NAME_PRINT(&peer->name))); /* we recvd all of the message */ opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s RECVD COMPLETE MESSAGE FROM %s (ORIGIN %s) OF %d BYTES FOR DEST %s TAG %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name), ORTE_NAME_PRINT(&peer->recv_msg->hdr.origin), (int)peer->recv_msg->hdr.nbytes, ORTE_NAME_PRINT(&peer->recv_msg->hdr.dst), peer->recv_msg->hdr.tag); /* am I the intended recipient (header was already converted back to host order)? */ if (peer->recv_msg->hdr.dst.jobid == ORTE_PROC_MY_NAME->jobid && peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) { /* yes - post it to the RML for delivery */ opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s DELIVERING TO RML", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin, peer->recv_msg->hdr.tag, peer->recv_msg->data, peer->recv_msg->hdr.nbytes); OBJ_RELEASE(peer->recv_msg); } else { /* no - find the next hop in the route */ hop = orte_routed.get_route(&peer->recv_msg->hdr.dst); if (hop.jobid == ORTE_JOBID_INVALID || hop.vpid == ORTE_VPID_INVALID) { /* no hop known - post the error to the component * and let the OOB see if there is another way * to get there from here */ opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s NO ROUTE TO %s FROM HERE", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); /* let the component know about the problem */ ORTE_ACTIVATE_TCP_MSG_ERROR(NULL, peer->recv_msg, &hop, mca_oob_tcp_component_no_route); /* cleanup */ OBJ_RELEASE(peer->recv_msg); return; } else { /* does we know how to reach the next hop? */ memcpy(&ui64, (char*)&hop, sizeof(uint64_t)); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_tcp_module.peers, ui64, (void**)&relay)) { opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s ADDRESS OF NEXT HOP %s TO %s IS UNKNOWN", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&hop), ORTE_NAME_PRINT(&peer->recv_msg->hdr.dst)); /* let the component know about the problem */ ORTE_ACTIVATE_TCP_MSG_ERROR(NULL, peer->recv_msg, &hop, mca_oob_tcp_component_hop_unknown); /* cleanup */ OBJ_RELEASE(peer->recv_msg); return; } opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s ROUTING TO %s FROM HERE", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&relay->name)); /* if this came from a different job family, then ensure * we know how to return */ if (ORTE_JOB_FAMILY(peer->recv_msg->hdr.origin.jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { orte_routed.update_route(&(peer->recv_msg->hdr.origin), &peer->name); } /* post the message for retransmission */ MCA_OOB_TCP_QUEUE_RELAY(peer->recv_msg, relay); OBJ_RELEASE(peer->recv_msg); } } peer->recv_msg = NULL; return; } else if (ORTE_ERR_RESOURCE_BUSY == rc || ORTE_ERR_WOULD_BLOCK == rc) { /* exit this event and let the event lib progress */ return; } else { // report the error opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: unable to recv message", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name))); /* turn off the recv event */ opal_event_del(&peer->recv_event); ORTE_FORCED_TERMINATE(1); return; } } break; default: opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: invalid socket state(%d)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)), peer->state); // mca_oob_tcp_peer_close(peer); break; } }