mca_monitoring_coll_data_t*mca_common_monitoring_coll_new( ompi_communicator_t*comm ) { mca_monitoring_coll_data_t*data = OBJ_NEW(mca_monitoring_coll_data_t); if( NULL == data ) { OPAL_MONITORING_PRINT_ERR("coll: new: data structure cannot be allocated"); return NULL; } data->procs = NULL; data->comm_name = NULL; data->p_comm = comm; /* Allocate hashtable */ if( NULL == comm_data ) { comm_data = OBJ_NEW(opal_hash_table_t); if( NULL == comm_data ) { OPAL_MONITORING_PRINT_ERR("coll: new: failed to allocate hashtable"); return data; } opal_hash_table_init(comm_data, 2048); } /* Insert in hashtable */ uint64_t key = *((uint64_t*)&comm); if( OPAL_SUCCESS != opal_hash_table_set_value_uint64(comm_data, key, (void*)data) ) { OPAL_MONITORING_PRINT_ERR("coll: new: failed to allocate memory or " "growing the hash table"); } /* Cache data so the procs can be released without affecting the output */ mca_common_monitoring_coll_cache(data); return data; }
void mca_oob_tcp_component_set_module(int fd, short args, void *cbdata) { mca_oob_tcp_peer_op_t *pop = (mca_oob_tcp_peer_op_t*)cbdata; uint64_t ui64; int rc; orte_oob_base_peer_t *bpr; opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:set_module called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&pop->peer)); /* make sure the OOB knows that we can reach this peer - we * are in the same event base as the OOB base, so we can * directly access its storage */ memcpy(&ui64, (char*)&pop->peer, sizeof(uint64_t)); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers, ui64, (void**)&bpr) || NULL == bpr) { bpr = OBJ_NEW(orte_oob_base_peer_t); } opal_bitmap_set_bit(&bpr->addressable, mca_oob_tcp_component.super.idx); bpr->component = &mca_oob_tcp_component.super; if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers, ui64, bpr))) { ORTE_ERROR_LOG(rc); } OBJ_RELEASE(pop); }
void mca_oob_usock_component_cannot_send(int fd, short args, void *cbdata) { mca_oob_usock_msg_error_t *pop = (mca_oob_usock_msg_error_t*)cbdata; uint64_t ui64; int rc; opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s usock:unable to send to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&pop->hop)); /* retrieve the peer's name */ memcpy(&ui64, (char*)&(pop->hop), sizeof(uint64_t)); /* mark the OOB's table that we can't reach it any more - for now, we don't * worry about shifting to another component. Eventually, we will want to push * this decision to the OOB so it can try other components and eventually error out */ if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers, ui64, NULL))) { ORTE_ERROR_LOG(rc); } /* have the OOB base try to send it again */ ORTE_OOB_SEND(pop->rmsg); OBJ_RELEASE(pop); }
void mca_oob_usock_component_lost_connection(int fd, short args, void *cbdata) { mca_oob_usock_peer_op_t *pop = (mca_oob_usock_peer_op_t*)cbdata; uint64_t ui64; int rc; opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s usock:lost connection called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&pop->peer->name)); /* retrieve the peer's name */ memcpy(&ui64, (char*)&(pop->peer->name), sizeof(uint64_t)); /* mark the OOB's table that we can't reach it any more - for now, we don't * worry about shifting to another component. Eventually, we will want to push * this decision to the OOB so it can try other components and eventually error out */ if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers, ui64, NULL))) { ORTE_ERROR_LOG(rc); } /* activate the proc state */ if (ORTE_SUCCESS != orte_routed.route_lost(&pop->peer->name)) { ORTE_ACTIVATE_PROC_STATE(&pop->peer->name, ORTE_PROC_STATE_LIFELINE_LOST); } else { ORTE_ACTIVATE_PROC_STATE(&pop->peer->name, ORTE_PROC_STATE_COMM_FAILED); } OBJ_RELEASE(pop); }
/* * Given an incoming segment, lookup the endpoint that sent it */ static inline ompi_btl_usnic_endpoint_t * lookup_sender(ompi_btl_usnic_module_t *module, ompi_btl_usnic_segment_t *seg) { int ret; ompi_btl_usnic_endpoint_t *sender; /* Use the hashed RTE process name in the BTL header to uniquely identify the sending process (using the MAC/hardware address only identifies the sending server -- not the sending RTE process). */ /* JMS We've experimented with using a handshake before sending any data so that instead of looking up a hash on the btl_header->sender, echo back the ptr to the sender's ompi_proc. There was limited speedup with this scheme; more investigation is required. */ ret = opal_hash_table_get_value_uint64(&module->senders, seg->us_btl_header->sender, (void**) &sender); if (OPAL_LIKELY(OPAL_SUCCESS == ret)) { return sender; } /* The sender wasn't in the hash table, so do a slow lookup and put the result in the hash table */ sender = ompi_btl_usnic_proc_lookup_endpoint(module, seg->us_btl_header->sender); if (NULL != sender) { opal_hash_table_set_value_uint64(&module->senders, seg->us_btl_header->sender, sender); return sender; } /* Whoa -- not found at all! */ return NULL; }
/* * Event callback when there is data available on the registered * socket to recv. This is called for the listen sockets to accept an * incoming connection, on new sockets trying to complete the software * connection process, and for probes. Data on an established * connection is handled elsewhere. */ static void recv_handler(int sd, short flg, void *cbdata) { mca_oob_tcp_conn_op_t *op = (mca_oob_tcp_conn_op_t*)cbdata; int flags; uint64_t *ui64; mca_oob_tcp_hdr_t hdr; mca_oob_tcp_peer_t *peer; opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler called", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); /* get the handshake */ if (ORTE_SUCCESS != mca_oob_tcp_peer_recv_connect_ack(NULL, sd, &hdr)) { goto cleanup; } /* finish processing ident */ if (MCA_OOB_TCP_IDENT == hdr.type) { if (NULL == (peer = mca_oob_tcp_peer_lookup(&hdr.origin))) { /* should never happen */ mca_oob_tcp_peer_close(peer); goto cleanup; } /* set socket up to be non-blocking */ if ((flags = fcntl(sd, F_GETFL, 0)) < 0) { opal_output(0, "%s mca_oob_tcp_recv_connect: fcntl(F_GETFL) failed: %s (%d)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno); } else { flags |= O_NONBLOCK; if (fcntl(sd, F_SETFL, flags) < 0) { opal_output(0, "%s mca_oob_tcp_recv_connect: fcntl(F_SETFL) failed: %s (%d)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno); } } /* is the peer instance willing to accept this connection */ peer->sd = sd; if (mca_oob_tcp_peer_accept(peer) == false) { if (OOB_TCP_DEBUG_CONNECT <= opal_output_get_verbosity(orte_oob_base_framework.framework_output)) { opal_output(0, "%s-%s mca_oob_tcp_recv_connect: " "rejected connection from %s connection state %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)), ORTE_NAME_PRINT(&(hdr.origin)), peer->state); } CLOSE_THE_SOCKET(sd); ui64 = (uint64_t*)(&peer->name); opal_hash_table_set_value_uint64(&mca_oob_tcp_module.peers, (*ui64), NULL); OBJ_RELEASE(peer); } } cleanup: OBJ_RELEASE(op); }
mca_btl_sctp_proc_t* mca_btl_sctp_proc_create(ompi_proc_t* ompi_proc) { int rc; size_t size; mca_btl_sctp_proc_t* btl_proc; uint64_t hash = orte_util_hash_name(&ompi_proc->proc_name); OPAL_THREAD_LOCK(&mca_btl_sctp_component.sctp_lock); rc = opal_hash_table_get_value_uint64(&mca_btl_sctp_component.sctp_procs, hash, (void**)&btl_proc); if(OMPI_SUCCESS == rc) { OPAL_THREAD_UNLOCK(&mca_btl_sctp_component.sctp_lock); return btl_proc; } btl_proc = OBJ_NEW(mca_btl_sctp_proc_t); if(NULL == btl_proc) { return NULL; } btl_proc->proc_ompi = ompi_proc; btl_proc->proc_name = ompi_proc->proc_name; /* add to hash table of all proc instance */ opal_hash_table_set_value_uint64(&mca_btl_sctp_component.sctp_procs, hash, btl_proc); OPAL_THREAD_UNLOCK(&mca_btl_sctp_component.sctp_lock); /* lookup sctp parameters exported by this proc */ rc = ompi_modex_recv( &mca_btl_sctp_component.super.btl_version, ompi_proc, (void**)&btl_proc->proc_addrs, &size ); if(rc != OMPI_SUCCESS) { BTL_ERROR(("mca_base_modex_recv: failed with return value=%d", rc)); OBJ_RELEASE(btl_proc); return NULL; } if(0 != (size % sizeof(mca_btl_sctp_addr_t))) { BTL_ERROR(("mca_base_modex_recv: invalid size %" PRIsize_t "\n", size)); return NULL; } btl_proc->proc_addr_count = size / sizeof(mca_btl_sctp_addr_t); /* allocate space for endpoint array - one for each exported address */ btl_proc->proc_endpoints = (mca_btl_base_endpoint_t**) malloc(btl_proc->proc_addr_count * sizeof(mca_btl_base_endpoint_t*)); if(NULL == btl_proc->proc_endpoints) { OBJ_RELEASE(btl_proc); return NULL; } if(NULL == mca_btl_sctp_component.sctp_local && ompi_proc == ompi_proc_local()) { mca_btl_sctp_component.sctp_local = btl_proc; } return btl_proc; }
static int component_set_addr(orte_process_name_t *peer, char **uris) { orte_proc_t *proc; mca_oob_usock_peer_t *pr; uint64_t *ui64; /* if I am an application, then everything is addressable * by me via my daemon */ if (ORTE_PROC_IS_APP) { ui64 = (uint64_t*)peer; if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_usock_module.peers, (*ui64), (void**)&pr) || NULL == pr) { pr = OBJ_NEW(mca_oob_usock_peer_t); pr->name = *peer; opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, (*ui64), pr); return ORTE_SUCCESS; } } /* if I am a daemon or HNP, I can only reach my * own local procs via this component */ if (ORTE_PROC_MY_NAME->jobid == peer->jobid) { /* another daemon */ return ORTE_ERR_TAKE_NEXT_OPTION; } if (NULL == (proc = orte_get_proc_object(peer)) || !proc->local_proc) { return ORTE_ERR_TAKE_NEXT_OPTION; } /* indicate that this peer is addressable by this component */ ui64 = (uint64_t*)peer; if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_usock_module.peers, (*ui64), (void**)&pr) || NULL == pr) { pr = OBJ_NEW(mca_oob_usock_peer_t); pr->name = *peer; opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, (*ui64), pr); } return ORTE_SUCCESS; }
mca_oob_ud_peer_t *mca_oob_ud_get_peer (struct mca_oob_ud_port_t *port, orte_process_name_t *name, uint32_t qpn, uint32_t qkey, uint16_t lid, uint8_t port_num) { struct ibv_ah_attr ah_attr; mca_oob_ud_peer_t *peer; int rc; rc = mca_oob_ud_peer_lookup (name, &peer); if (ORTE_SUCCESS == rc) { OPAL_OUTPUT_VERBOSE((20, mca_oob_base_output, "%s oob:ud:peer_from_msg_hdr using " "cached peer", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); return peer; } OPAL_OUTPUT_VERBOSE((10, mca_oob_base_output, "%s oob:ud:peer_from_msg_hdr creating " "peer from return address", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); peer = OBJ_NEW(mca_oob_ud_peer_t); if (NULL == peer) { return NULL; } peer->peer_qpn = qpn; peer->peer_qkey = qkey; peer->peer_name = *name; peer->peer_lid = lid; peer->peer_port = port_num; memset (&ah_attr, 0, sizeof (ah_attr)); ah_attr.dlid = peer->peer_lid; ah_attr.port_num = peer->peer_port; peer->peer_ah = ibv_create_ah (port->device->ib_pd, &ah_attr); if (NULL == peer->peer_ah) { free (peer); return NULL; } peer->peer_context = port->device; OPAL_THREAD_LOCK(&mca_oob_ud_component.ud_lock); opal_hash_table_set_value_uint64(&mca_oob_ud_component.ud_peers, orte_util_hash_name(name), (void *) peer); OPAL_THREAD_UNLOCK(&mca_oob_ud_component.ud_lock); return peer; }
/* * Record listening address for this peer - the connection * is created on first-send */ static void process_set_peer(int fd, short args, void *cbdata) { mca_oob_tcp_peer_op_t *pop = (mca_oob_tcp_peer_op_t*)cbdata; struct sockaddr inaddr; mca_oob_tcp_peer_t *peer; int rc=ORTE_SUCCESS; uint64_t *ui64 = (uint64_t*)(&pop->peer); mca_oob_tcp_addr_t *maddr; opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:processing set_peer cmd", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); if (AF_INET != pop->af_family) { opal_output_verbose(20, orte_oob_base_framework.framework_output, "%s NOT AF_INET", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); goto cleanup; } if (NULL == (peer = mca_oob_tcp_peer_lookup(&pop->peer))) { peer = OBJ_NEW(mca_oob_tcp_peer_t); peer->name.jobid = pop->peer.jobid; peer->name.vpid = pop->peer.vpid; opal_output_verbose(20, orte_oob_base_framework.framework_output, "%s SET_PEER ADDING PEER %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&pop->peer)); if (OPAL_SUCCESS != opal_hash_table_set_value_uint64(&mca_oob_tcp_module.peers, (*ui64), peer)) { OBJ_RELEASE(peer); return; } } if ((rc = parse_uri(pop->af_family, pop->net, pop->port, (struct sockaddr*) &inaddr)) != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); goto cleanup; } opal_output_verbose(20, orte_oob_base_framework.framework_output, "%s set_peer: peer %s is listening on net %s port %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&pop->peer), (NULL == pop->net) ? "NULL" : pop->net, (NULL == pop->port) ? "NULL" : pop->port); maddr = OBJ_NEW(mca_oob_tcp_addr_t); memcpy(&maddr->addr, &inaddr, sizeof(inaddr)); opal_list_append(&peer->addrs, &maddr->super); cleanup: OBJ_RELEASE(pop); }
static void mca_oob_tcp_msg_ident(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* peer) { orte_process_name_t src = msg->msg_hdr.msg_src; OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock); if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &peer->peer_name, &src) != OPAL_EQUAL) { opal_hash_table_remove_value_uint64(&mca_oob_tcp_component.tcp_peers, orte_util_hash_name(&peer->peer_name)); peer->peer_name = src; opal_hash_table_set_value_uint64(&mca_oob_tcp_component.tcp_peers, orte_util_hash_name(&peer->peer_name), peer); } OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); }
/** * Find proc_data_t container associated with given * opal_identifier_t. */ static proc_data_t* lookup_opal_proc(opal_hash_table_t *jtable, opal_identifier_t id) { proc_data_t *proc_data = NULL; opal_hash_table_get_value_uint64(jtable, id, (void**)&proc_data); if (NULL == proc_data) { /* The proc clearly exists, so create a data structure for it */ proc_data = OBJ_NEW(proc_data_t); if (NULL == proc_data) { opal_output(0, "db:hash:lookup_opal_proc: unable to allocate proc_data_t\n"); return NULL; } opal_hash_table_set_value_uint64(jtable, id, proc_data); } return proc_data; }
/** * This PML monitors only the processes in the MPI_COMM_WORLD. As OMPI is now lazily * adding peers on the first call to add_procs we need to check how many processes * are in the MPI_COMM_WORLD to create the storage with the right size. */ int mca_pml_monitoring_add_procs(struct ompi_proc_t **procs, size_t nprocs) { opal_process_name_t tmp, wp_name; size_t i, peer_rank, nprocs_world; uint64_t key; if(NULL == translation_ht) { translation_ht = OBJ_NEW(opal_hash_table_t); opal_hash_table_init(translation_ht, 2048); /* get my rank in the MPI_COMM_WORLD */ my_rank = ompi_comm_rank((ompi_communicator_t*)&ompi_mpi_comm_world); } nprocs_world = ompi_comm_size((ompi_communicator_t*)&ompi_mpi_comm_world); /* For all procs in the same MPI_COMM_WORLD we need to add them to the hash table */ for( i = 0; i < nprocs; i++ ) { /* Extract the peer procname from the procs array */ if( ompi_proc_is_sentinel(procs[i]) ) { tmp = ompi_proc_sentinel_to_name((uintptr_t)procs[i]); } else { tmp = procs[i]->super.proc_name; } if( tmp.jobid != ompi_proc_local_proc->super.proc_name.jobid ) continue; for( peer_rank = 0; peer_rank < nprocs_world; peer_rank++ ) { wp_name = ompi_group_get_proc_name(((ompi_communicator_t*)&ompi_mpi_comm_world)->c_remote_group, peer_rank); if( 0 != opal_compare_proc( tmp, wp_name) ) continue; /* Find the rank of the peer in MPI_COMM_WORLD */ key = *((uint64_t*)&tmp); /* store the rank (in COMM_WORLD) of the process with its name (a uniq opal ID) as key in the hash table*/ if( OPAL_SUCCESS != opal_hash_table_set_value_uint64(translation_ht, key, (void*)(uintptr_t)peer_rank) ) { return OMPI_ERR_OUT_OF_RESOURCE; /* failed to allocate memory or growing the hash table */ } break; } } return pml_selected_module.pml_add_procs(procs, nprocs); }
void mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata) { mca_oob_tcp_peer_op_t *pop = (mca_oob_tcp_peer_op_t*)cbdata; uint64_t ui64; orte_oob_base_peer_t *bpr; int rc; opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:lost connection called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&pop->peer)); /* if we are terminating, or recovery isn't enabled, then don't attempt to reconnect */ if (!orte_enable_recovery || orte_orteds_term_ordered || orte_finalizing || orte_abnormal_term_ordered) { goto cleanup; } /* Mark that we no longer support this peer */ memcpy(&ui64, (char*)&pop->peer, sizeof(uint64_t)); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers, ui64, (void**)&bpr) || NULL == bpr) { bpr = OBJ_NEW(orte_oob_base_peer_t); } opal_bitmap_clear_bit(&bpr->addressable, mca_oob_tcp_component.super.idx); if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers, ui64, NULL))) { ORTE_ERROR_LOG(rc); } cleanup: /* activate the proc state */ if (ORTE_SUCCESS != orte_routed.route_lost(&pop->peer)) { ORTE_ACTIVATE_PROC_STATE(&pop->peer, ORTE_PROC_STATE_LIFELINE_LOST); } else { ORTE_ACTIVATE_PROC_STATE(&pop->peer, ORTE_PROC_STATE_COMM_FAILED); } OBJ_RELEASE(pop); }
int mca_pml_monitoring_add_procs(struct ompi_proc_t **procs, size_t nprocs) { /** * Create the monitoring hashtable only for my MPI_COMM_WORLD. We choose * to ignore by now all other processes. */ if(NULL == translation_ht) { size_t i; uint64_t key; opal_process_name_t tmp; nbprocs = nprocs; translation_ht = OBJ_NEW(opal_hash_table_t); opal_hash_table_init(translation_ht, 2048); for( i = 0; i < nprocs; i++ ) { /* rank : ompi_proc_local_proc in procs */ if( procs[i] == ompi_proc_local_proc) my_rank = i; /* Extract the peer procname from the procs array */ if( ompi_proc_is_sentinel(procs[i]) ) { tmp = ompi_proc_sentinel_to_name((uintptr_t)procs[i]); } else { tmp = procs[i]->super.proc_name; } key = *((uint64_t*)&tmp); /* store the rank (in COMM_WORLD) of the process with its name (a uniq opal ID) as key in the hash table*/ if( OPAL_SUCCESS != opal_hash_table_set_value_uint64(translation_ht, key, (void*)(uintptr_t)i) ) { return OMPI_ERR_OUT_OF_RESOURCE; /* failed to allocate memory or growing the hash table */ } } } return pml_selected_module.pml_add_procs(procs, nprocs); }
void mca_oob_tcp_component_no_route(int fd, short args, void *cbdata) { mca_oob_tcp_msg_error_t *mop = (mca_oob_tcp_msg_error_t*)cbdata; uint64_t ui64; int rc; orte_oob_base_peer_t *bpr; opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:no route called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&mop->hop)); /* mark that we cannot reach this hop */ memcpy(&ui64, (char*)&(mop->hop), sizeof(uint64_t)); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers, ui64, (void**)&bpr) || NULL == bpr) { bpr = OBJ_NEW(orte_oob_base_peer_t); } opal_bitmap_clear_bit(&bpr->addressable, mca_oob_tcp_component.super.idx); if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers, ui64, NULL))) { ORTE_ERROR_LOG(rc); } /* report the error back to the OOB and let it try other components * or declare a problem */ if (!orte_finalizing && !orte_abnormal_term_ordered) { /* if this was a lifeline, then alert */ if (ORTE_SUCCESS != orte_routed.route_lost(&mop->hop)) { ORTE_ACTIVATE_PROC_STATE(&mop->hop, ORTE_PROC_STATE_LIFELINE_LOST); } else { ORTE_ACTIVATE_PROC_STATE(&mop->hop, ORTE_PROC_STATE_COMM_FAILED); } } OBJ_RELEASE(mop); }
/** * Find modex_proc_data_t container associated with given * orte_process_name_t. * * The global lock should *NOT* be held when * calling this function. */ static modex_proc_data_t* modex_lookup_orte_proc(const orte_process_name_t *orte_proc) { modex_proc_data_t *proc_data = NULL; OPAL_THREAD_LOCK(&mutex); opal_hash_table_get_value_uint64(modex_data, orte_util_hash_name(orte_proc), (void**)&proc_data); if (NULL == proc_data) { /* The proc clearly exists, so create a modex structure for it */ proc_data = OBJ_NEW(modex_proc_data_t); if (NULL == proc_data) { opal_output(0, "grpcomm_basic_modex_lookup_orte_proc: unable to allocate modex_proc_data_t\n"); OPAL_THREAD_UNLOCK(&mutex); return NULL; } opal_hash_table_set_value_uint64(modex_data, orte_util_hash_name(orte_proc), proc_data); } OPAL_THREAD_UNLOCK(&mutex); return proc_data; }
int MPI_Type_create_f90_complex(int p, int r, MPI_Datatype *newtype) { uint64_t key; int p_key, r_key; OPAL_CR_NOOP_PROGRESS(); if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); /* Note: These functions accept negative integers for the p and r * arguments. This is because for the SELECTED_COMPLEX_KIND, * negative numbers are equivalent to zero values. See section * 13.14.95 of the Fortran 95 standard. */ if ((MPI_UNDEFINED == p && MPI_UNDEFINED == r)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } /* if the user does not care about p or r set them to 0 so the * test associate with them will always succeed. */ p_key = p; r_key = r; if( MPI_UNDEFINED == p ) p_key = 0; if( MPI_UNDEFINED == r ) r_key = 0; /** * With respect to the MPI standard, MPI-2.0 Sect. 10.2.5, MPI_TYPE_CREATE_F90_xxxx, * page 295, line 47 we handle this nicely by caching the values in a hash table. * However, as the value of might not always make sense, a little bit of optimization * might be a good idea. Therefore, first we try to see if we can handle the value * with some kind of default value, and if it's the case then we look into the * cache. */ if ( (LDBL_DIG < p) || (LDBL_MAX_10_EXP < r) || (-LDBL_MIN_10_EXP < r) ) *newtype = &ompi_mpi_datatype_null.dt; else if( (DBL_DIG < p) || (DBL_MAX_10_EXP < r) || (-DBL_MIN_10_EXP < r) ) *newtype = &ompi_mpi_ldblcplex.dt; else if( (FLT_DIG < p) || (FLT_MAX_10_EXP < r) || (-FLT_MIN_10_EXP < r) ) *newtype = &ompi_mpi_dblcplex.dt; else *newtype = &ompi_mpi_cplex.dt; if( *newtype != &ompi_mpi_datatype_null.dt ) { ompi_datatype_t* datatype; const int* a_i[2]; int rc; key = (((uint64_t)p_key) << 32) | ((uint64_t)r_key); if( OPAL_SUCCESS == opal_hash_table_get_value_uint64( &ompi_mpi_f90_complex_hashtable, key, (void**)newtype ) ) { return MPI_SUCCESS; } /* Create the duplicate type corresponding to selected type, then * set the argument to be a COMBINER with the correct value of r * and add it to the hash table. */ if (OMPI_SUCCESS != ompi_datatype_duplicate( *newtype, &datatype)) { OMPI_ERRHANDLER_RETURN (MPI_ERR_INTERN, MPI_COMM_WORLD, MPI_ERR_INTERN, FUNC_NAME ); } /* Make sure the user is not allowed to free this datatype as specified * in the MPI standard. */ datatype->super.flags |= OMPI_DATATYPE_FLAG_PREDEFINED; /* Mark the datatype as a special F90 convenience type */ // Specifically using opal_snprintf() here (instead of // snprintf()) so that over-eager compilers do not warn us // that we may be truncating the output. We *know* that the // output may be truncated, and that's ok. opal_snprintf(datatype->name, sizeof(datatype->name), "COMBINER %s", (*newtype)->name); a_i[0] = &p; a_i[1] = &r; ompi_datatype_set_args( datatype, 2, a_i, 0, NULL, 0, NULL, MPI_COMBINER_F90_COMPLEX ); rc = opal_hash_table_set_value_uint64( &ompi_mpi_f90_complex_hashtable, key, datatype ); if (OMPI_SUCCESS != rc) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, rc, FUNC_NAME); } *newtype = datatype; return MPI_SUCCESS; } return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); }
/* * Receive the peers globally unique process identification from a newly * connected socket and verify the expected response. If so, move the * socket to a connected state. */ int pmix_server_recv_connect_ack(pmix_server_peer_t* pr, int sd, pmix_server_hdr_t *dhdr) { char *msg; char *version; int rc; opal_sec_cred_t creds; pmix_server_peer_t *peer; pmix_server_hdr_t hdr; orte_process_name_t sender; opal_output_verbose(2, pmix_server_output, "%s RECV CONNECT ACK FROM %s ON SOCKET %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == pr) ? "UNKNOWN" : ORTE_NAME_PRINT(&pr->name), sd); peer = pr; /* ensure all is zero'd */ memset(&hdr, 0, sizeof(pmix_server_hdr_t)); if (usock_peer_recv_blocking(peer, sd, &hdr, sizeof(pmix_server_hdr_t))) { if (NULL != peer) { /* If the peer state is CONNECT_ACK, then we were waiting for * the connection to be ack'd */ if (peer->state != PMIX_SERVER_CONNECT_ACK) { /* handshake broke down - abort this connection */ opal_output(0, "%s RECV CONNECT BAD HANDSHAKE FROM %s ON SOCKET %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name), sd); peer->state = PMIX_SERVER_FAILED; CLOSE_THE_SOCKET(peer->sd); return ORTE_ERR_UNREACH; } } } else { /* unable to complete the recv */ opal_output_verbose(2, pmix_server_output, "%s unable to complete recv of connect-ack from %s ON SOCKET %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&peer->name), sd); return ORTE_ERR_UNREACH; } /* if the requestor wanted the header returned, then do so now */ if (NULL != dhdr) { *dhdr = hdr; } if (hdr.type != PMIX_USOCK_IDENT) { opal_output(0, "usock_peer_recv_connect_ack: invalid header type: %d\n", hdr.type); if (NULL != peer) { peer->state = PMIX_SERVER_FAILED; CLOSE_THE_SOCKET(peer->sd); } else { CLOSE_THE_SOCKET(sd); } return ORTE_ERR_UNREACH; } opal_output_verbose(2, pmix_server_output, "%s connect-ack recvd from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&peer->name)); sender = hdr.id; /* if we don't already have it, get the peer */ if (NULL == peer) { peer = pmix_server_peer_lookup(sd); if (NULL == peer) { opal_output_verbose(2, pmix_server_output, "%s pmix_server_recv_connect: connection from new peer", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); peer = OBJ_NEW(pmix_server_peer_t); peer->name = sender; peer->state = PMIX_SERVER_ACCEPTING; peer->sd = sd; if (OPAL_SUCCESS != opal_hash_table_set_value_uint64(pmix_server_peers, sd, peer)) { OBJ_RELEASE(peer); CLOSE_THE_SOCKET(sd); return ORTE_ERR_UNREACH; } } else if (PMIX_SERVER_CONNECTED == peer->state || PMIX_SERVER_CONNECTING == peer->state || PMIX_SERVER_CONNECT_ACK == peer->state) { /* if I already have an established such a connection, then we need * to reject this connection */ opal_output_verbose(2, pmix_server_output, "%s EXISTING CONNECTION WITH %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&sender)); if (peer->recv_ev_active) { opal_event_del(&peer->recv_event); peer->recv_ev_active = false; } if (peer->send_ev_active) { opal_event_del(&peer->send_event); peer->send_ev_active = false; } if (0 < peer->sd) { CLOSE_THE_SOCKET(peer->sd); peer->sd = -1; } peer->retries = 0; } } else { /* compare the peers name to the expected value */ if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &peer->name, &sender)) { opal_output(0, "%s usock_peer_recv_connect_ack: " "received unexpected process identifier %s from %s\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&sender), ORTE_NAME_PRINT(&(peer->name))); peer->state = PMIX_SERVER_FAILED; CLOSE_THE_SOCKET(peer->sd); return ORTE_ERR_UNREACH; } } opal_output_verbose(2, pmix_server_output, "%s connect-ack header from %s is okay", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); /* get the authentication and version payload */ if (NULL == (msg = (char*)malloc(hdr.nbytes))) { peer->state = PMIX_SERVER_FAILED; CLOSE_THE_SOCKET(peer->sd); return ORTE_ERR_OUT_OF_RESOURCE; } memset(msg, 0, hdr.nbytes); if (!usock_peer_recv_blocking(peer, sd, msg, hdr.nbytes)) { /* unable to complete the recv */ opal_output_verbose(2, pmix_server_output, "%s unable to complete recv of connect-ack from %s ON SOCKET %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name), peer->sd); free(msg); return ORTE_ERR_UNREACH; } /* check that this is from a matching version */ version = (char*)(msg); if (0 != strcmp(version, opal_version_string)) { opal_output(0, "%s usock_peer_recv_connect_ack: " "received different version from %s: %s instead of %s\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)), version, opal_version_string); peer->state = PMIX_SERVER_FAILED; CLOSE_THE_SOCKET(peer->sd); free(msg); return ORTE_ERR_UNREACH; } opal_output_verbose(2, pmix_server_output, "%s connect-ack version from %s matches ours", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); /* check security token */ creds.credential = (char*)(msg + strlen(version) + 1); creds.size = strlen(creds.credential); if (OPAL_SUCCESS != (rc = opal_sec.authenticate(&creds))) { ORTE_ERROR_LOG(rc); } free(msg); opal_output_verbose(2, pmix_server_output, "%s connect-ack %s authenticated", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); /* if the requestor wanted the header returned, then they * will complete their processing */ if (NULL != dhdr) { return ORTE_SUCCESS; } /* connected */ pmix_server_peer_connected(peer); if (2 <= opal_output_get_verbosity(pmix_server_output)) { pmix_server_peer_dump(peer, "connected"); } return ORTE_SUCCESS; }
mca_btl_tcp2_proc_t* mca_btl_tcp2_proc_create(ompi_proc_t* ompi_proc) { int rc; size_t size; mca_btl_tcp2_proc_t* btl_proc; uint64_t hash = orte_util_hash_name(&ompi_proc->proc_name); OPAL_THREAD_LOCK(&mca_btl_tcp2_component.tcp_lock); rc = opal_hash_table_get_value_uint64(&mca_btl_tcp2_component.tcp_procs, hash, (void**)&btl_proc); if(OMPI_SUCCESS == rc) { OPAL_THREAD_UNLOCK(&mca_btl_tcp2_component.tcp_lock); return btl_proc; } btl_proc = OBJ_NEW(mca_btl_tcp2_proc_t); if(NULL == btl_proc) return NULL; btl_proc->proc_ompi = ompi_proc; /* add to hash table of all proc instance */ opal_hash_table_set_value_uint64(&mca_btl_tcp2_component.tcp_procs, hash, btl_proc); OPAL_THREAD_UNLOCK(&mca_btl_tcp2_component.tcp_lock); /* lookup tcp parameters exported by this proc */ rc = ompi_modex_recv( &mca_btl_tcp2_component.super.btl_version, ompi_proc, (void**)&btl_proc->proc_addrs, &size ); if(rc != OMPI_SUCCESS) { BTL_ERROR(("mca_base_modex_recv: failed with return value=%d", rc)); OBJ_RELEASE(btl_proc); return NULL; } if(0 != (size % sizeof(mca_btl_tcp2_addr_t))) { BTL_ERROR(("mca_base_modex_recv: invalid size %lu: btl-size: %lu\n", (unsigned long) size, (unsigned long)sizeof(mca_btl_tcp2_addr_t))); return NULL; } btl_proc->proc_addr_count = size / sizeof(mca_btl_tcp2_addr_t); /* allocate space for endpoint array - one for each exported address */ btl_proc->proc_endpoints = (mca_btl_base_endpoint_t**) malloc((1 + btl_proc->proc_addr_count) * sizeof(mca_btl_base_endpoint_t*)); if(NULL == btl_proc->proc_endpoints) { OBJ_RELEASE(btl_proc); return NULL; } if(NULL == mca_btl_tcp2_component.tcp_local && ompi_proc == ompi_proc_local()) { mca_btl_tcp2_component.tcp_local = btl_proc; } { /* convert the OMPI addr_family field to OS constants, * so we can check for AF_INET (or AF_INET6) and don't have * to deal with byte ordering anymore. */ unsigned int i; for (i = 0; i < btl_proc->proc_addr_count; i++) { if (MCA_BTL_TCP_AF_INET == btl_proc->proc_addrs[i].addr_family) { btl_proc->proc_addrs[i].addr_family = AF_INET; } #if OPAL_WANT_IPV6 if (MCA_BTL_TCP_AF_INET6 == btl_proc->proc_addrs[i].addr_family) { btl_proc->proc_addrs[i].addr_family = AF_INET6; } #endif } } return btl_proc; }