static int mca_btl_tcp_component_close(void)
{
    opal_list_item_t* item;
    opal_list_item_t* next;

    if(NULL != mca_btl_tcp_component.tcp_if_include) {
        free(mca_btl_tcp_component.tcp_if_include);
        mca_btl_tcp_component.tcp_if_include = NULL;
    }
    if(NULL != mca_btl_tcp_component.tcp_if_exclude) {
       free(mca_btl_tcp_component.tcp_if_exclude);
       mca_btl_tcp_component.tcp_if_exclude = NULL;
    }
    if (NULL != mca_btl_tcp_component.tcp_if_seq) {
        free(mca_btl_tcp_component.tcp_if_seq);
    }

    if (NULL != mca_btl_tcp_component.tcp_btls)
        free(mca_btl_tcp_component.tcp_btls);
 
    if (mca_btl_tcp_component.tcp_listen_sd >= 0) {
        opal_event_del(&mca_btl_tcp_component.tcp_recv_event);
        CLOSE_THE_SOCKET(mca_btl_tcp_component.tcp_listen_sd);
        mca_btl_tcp_component.tcp_listen_sd = -1;
    }
#if OPAL_WANT_IPV6
    if (mca_btl_tcp_component.tcp6_listen_sd >= 0) {
        opal_event_del(&mca_btl_tcp_component.tcp6_recv_event);
        CLOSE_THE_SOCKET(mca_btl_tcp_component.tcp6_listen_sd);
        mca_btl_tcp_component.tcp6_listen_sd = -1;
    }
#endif

    /* cleanup any pending events */
    OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
    for(item =  opal_list_get_first(&mca_btl_tcp_component.tcp_events);
        item != opal_list_get_end(&mca_btl_tcp_component.tcp_events); 
        item = next) {
        mca_btl_tcp_event_t* event = (mca_btl_tcp_event_t*)item;
        next = opal_list_get_next(item);
        opal_event_del(&event->event);
        OBJ_RELEASE(event);
    }
    OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);

    /* release resources */
    OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_procs);
    OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_events);
    OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_frag_eager);
    OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_frag_max);
    OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_frag_user);
    OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_lock);

#ifdef __WINDOWS__
    WSACleanup();
#endif

    return OMPI_SUCCESS;
}
/* Used only with one-to-one socket.
 *
 * Event callback when there is data available on the registered 
 * socket to recv.
 */
static void mca_btl_sctp_component_recv_handler(int sd, short flags, void* user)
{
    orte_process_name_t guid;
    struct sockaddr_in addr;
    int retval;
    mca_btl_sctp_proc_t* btl_proc;
    opal_socklen_t addr_len = sizeof(addr);
    mca_btl_sctp_event_t *event = (mca_btl_sctp_event_t *)user;
    int msg_flags=0;
    struct sctp_sndrcvinfo sri;
    
    /* accept new connections on the listen socket */
    if(mca_btl_sctp_component.sctp_listen_sd == sd) {
        mca_btl_sctp_component_accept();
        return;
    }
    OBJ_RELEASE(event);

    retval = sctp_recvmsg(sd, (char *)&guid, sizeof(guid), 0, 0, &sri, &msg_flags);

    if(retval != sizeof(guid)) {
        CLOSE_THE_SOCKET(sd); 
        return; 
    }
    SCTP_BTL_ERROR(("mca_btl_sctp_component_recv_handler() sd=%d, got %d byte guid.\n", sd, retval));

    ORTE_PROCESS_NAME_NTOH(guid);
   
    /* lookup the corresponding process */
    btl_proc = mca_btl_sctp_proc_lookup(&guid);
    if(NULL == btl_proc) {
        BTL_ERROR(("errno=%d",errno));
        CLOSE_THE_SOCKET(sd);
        return;
    }

    /* lookup peer address */
    if(getpeername(sd, (struct sockaddr*)&addr, &addr_len) != 0) {
        if(opal_socket_errno != ECONNRESET && opal_socket_errno != EBADF && opal_socket_errno != ENOTCONN) {
            BTL_ERROR(("getpeername() failed with errno=%d", opal_socket_errno));
        }
        CLOSE_THE_SOCKET(sd);
        return;
    }

    /* are there any existing peer instances will to accept this connection */
    if(mca_btl_sctp_proc_accept(btl_proc, &addr, sd) == false) {
        CLOSE_THE_SOCKET(sd);
        return;
    }
}
示例#3
0
static pmix_status_t pmix_ptl_close(void)
{
    if (!pmix_ptl_globals.initialized) {
        return PMIX_SUCCESS;
    }
    pmix_ptl_globals.initialized = false;

    /* ensure the listen thread has been shut down */
    pmix_ptl_base_stop_listening();

    if (NULL != pmix_client_globals.myserver) {
        if (0 <= pmix_client_globals.myserver->sd) {
            CLOSE_THE_SOCKET(pmix_client_globals.myserver->sd);
            pmix_client_globals.myserver->sd = -1;
        }
    }

    /* the components will cleanup when closed */
    PMIX_LIST_DESTRUCT(&pmix_ptl_globals.actives);
    PMIX_LIST_DESTRUCT(&pmix_ptl_globals.posted_recvs);
    PMIX_LIST_DESTRUCT(&pmix_ptl_globals.unexpected_msgs);
    PMIX_LIST_DESTRUCT(&pmix_ptl_globals.listeners);

    return pmix_mca_base_framework_components_close(&pmix_ptl_base_framework, NULL);
}
static void peer_des(mca_oob_usock_peer_t *peer)
{
    if (0 <= peer->sd) {
        CLOSE_THE_SOCKET(peer->sd);
    }
    OPAL_LIST_DESTRUCT(&peer->send_queue);
}
/*
 * A blocking send on a non-blocking socket. Used to send the small amount of connection
 * information that identifies the peers endpoint.
 */
static int usock_peer_send_blocking(pmix_server_peer_t* peer,
                                    int sd, void* data, size_t size)
{
    unsigned char* ptr = (unsigned char*)data;
    size_t cnt = 0;
    int retval;

    opal_output_verbose(2, pmix_server_output,
                        "%s send blocking of %"PRIsize_t" bytes to socket %d",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        size, sd);

    while (cnt < size) {
        retval = send(sd, (char*)ptr+cnt, size-cnt, 0);
        if (retval < 0) {
            if (opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
                opal_output(0, "%s usock_peer_send_blocking: send() to socket %d failed: %s (%d)\n",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), sd,
                            strerror(opal_socket_errno),
                            opal_socket_errno);
                peer->state = PMIX_SERVER_FAILED;
                CLOSE_THE_SOCKET(peer->sd);
                return ORTE_ERR_UNREACH;
            }
            continue;
        }
        cnt += retval;
    }

    opal_output_verbose(2, pmix_server_output,
                        "%s blocking send complete to socket %d",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), sd);

    return ORTE_SUCCESS;
}
int mca_btl_sctp_component_close(void)
{
    opal_list_item_t* item;
    opal_list_item_t* next;

    if(NULL != mca_btl_sctp_component.sctp_if_include) {
        free(mca_btl_sctp_component.sctp_if_include);
    }
    if(NULL != mca_btl_sctp_component.sctp_if_exclude) {
       free(mca_btl_sctp_component.sctp_if_exclude);
    }
    if (NULL != mca_btl_sctp_component.sctp_btls) {
        free(mca_btl_sctp_component.sctp_btls);
    }

    mca_btl_sctp_recv_handler_freebuf();
 
    if (mca_btl_sctp_component.sctp_listen_sd >= 0) {
        opal_event_del(&mca_btl_sctp_component.sctp_recv_event);
        CLOSE_THE_SOCKET(mca_btl_sctp_component.sctp_listen_sd);
        mca_btl_sctp_component.sctp_listen_sd = -1;
    }

    /* cleanup any pending events */
    OPAL_THREAD_LOCK(&mca_btl_sctp_component.sctp_lock);
    for(item =	opal_list_get_first(&mca_btl_sctp_component.sctp_events);
	item != opal_list_get_end(&mca_btl_sctp_component.sctp_events);
	item = next) {
	mca_btl_sctp_event_t* event = (mca_btl_sctp_event_t*)item;
	next = opal_list_get_next(item);
	opal_event_del(&event->event);
	OBJ_RELEASE(event);
    }
    OPAL_THREAD_UNLOCK(&mca_btl_sctp_component.sctp_lock);

    /* release resources */
    OBJ_DESTRUCT(&mca_btl_sctp_component.sctp_procs);
#if MCA_BTL_SCTP_DONT_USE_HASH
    if(NULL != recvr_proc_table) {
        free(recvr_proc_table);
    }
    if(NULL != sender_proc_table) {
        free(sender_proc_table);
    }
#else
    OBJ_DESTRUCT(&mca_btl_sctp_component.sctp_assocID_hash);
#endif
    OBJ_DESTRUCT(&mca_btl_sctp_component.sctp_events);
    OBJ_DESTRUCT(&mca_btl_sctp_component.sctp_frag_eager);
    OBJ_DESTRUCT(&mca_btl_sctp_component.sctp_frag_max);
    OBJ_DESTRUCT(&mca_btl_sctp_component.sctp_frag_user);
    OBJ_DESTRUCT(&mca_btl_sctp_component.sctp_lock);

#ifdef __WINDOWS__
    WSACleanup();
#endif

    return OMPI_SUCCESS;
}
示例#7
0
/*
 * Event callback when there is data available on the registered
 * socket to recv.  This is called for the listen sockets to accept an
 * incoming connection, on new sockets trying to complete the software
 * connection process, and for probes.  Data on an established
 * connection is handled elsewhere. 
 */
static void recv_handler(int sd, short flg, void *cbdata)
{
    mca_oob_tcp_conn_op_t *op = (mca_oob_tcp_conn_op_t*)cbdata;
    int flags;
    uint64_t *ui64;
    mca_oob_tcp_hdr_t hdr;
    mca_oob_tcp_peer_t *peer;

    opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                        "%s:tcp:recv:handler called",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));

    /* get the handshake */
    if (ORTE_SUCCESS != mca_oob_tcp_peer_recv_connect_ack(NULL, sd, &hdr)) {
        goto cleanup;
    }

    /* finish processing ident */
    if (MCA_OOB_TCP_IDENT == hdr.type) {
        if (NULL == (peer = mca_oob_tcp_peer_lookup(&hdr.origin))) {
            /* should never happen */
            mca_oob_tcp_peer_close(peer);
            goto cleanup;
        }
        /* set socket up to be non-blocking */
        if ((flags = fcntl(sd, F_GETFL, 0)) < 0) {
            opal_output(0, "%s mca_oob_tcp_recv_connect: fcntl(F_GETFL) failed: %s (%d)",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno);
        } else {
            flags |= O_NONBLOCK;
            if (fcntl(sd, F_SETFL, flags) < 0) {
                opal_output(0, "%s mca_oob_tcp_recv_connect: fcntl(F_SETFL) failed: %s (%d)",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno);
            }
        }
        
        /* is the peer instance willing to accept this connection */
        peer->sd = sd;
        if (mca_oob_tcp_peer_accept(peer) == false) {
            if (OOB_TCP_DEBUG_CONNECT <= opal_output_get_verbosity(orte_oob_base_framework.framework_output)) {
                opal_output(0, "%s-%s mca_oob_tcp_recv_connect: "
                            "rejected connection from %s connection state %d",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            ORTE_NAME_PRINT(&(peer->name)),
                            ORTE_NAME_PRINT(&(hdr.origin)),
                            peer->state);
            }
            CLOSE_THE_SOCKET(sd);
            ui64 = (uint64_t*)(&peer->name);
            opal_hash_table_set_value_uint64(&mca_oob_tcp_module.peers, (*ui64), NULL);
            OBJ_RELEASE(peer);
        }
    }

 cleanup:
    OBJ_RELEASE(op);
}
示例#8
0
static void ldes(pmix_listener_t *p)
{
    if (0 <= p->socket) {
        CLOSE_THE_SOCKET(p->socket);
    }
    if (NULL != p->varname) {
        free(p->varname);
    }
    if (NULL != p->uri) {
        free(p->uri);
    }
}
示例#9
0
/*
 * Remove any event registrations associated with the socket
 * and update the endpoint state to reflect the connection has
 * been closed.
 */
void mca_btl_tcp2_endpoint_close(mca_btl_base_endpoint_t* btl_endpoint)
{
    if(btl_endpoint->endpoint_sd < 0)
        return;
    btl_endpoint->endpoint_state = MCA_BTL_TCP_CLOSED;
    btl_endpoint->endpoint_retries++;
    opal_event_del(&btl_endpoint->endpoint_recv_event);
    opal_event_del(&btl_endpoint->endpoint_send_event);
    CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd);
    btl_endpoint->endpoint_sd = -1;
#if MCA_BTL_TCP_ENDPOINT_CACHE
    free( btl_endpoint->endpoint_cache );
    btl_endpoint->endpoint_cache        = NULL;
    btl_endpoint->endpoint_cache_pos    = NULL;
    btl_endpoint->endpoint_cache_length = 0;
#endif  /* MCA_BTL_TCP_ENDPOINT_CACHE */
}
示例#10
0
void mca_oob_tcp_peer_shutdown(mca_oob_tcp_peer_t* peer)
{
    /* giving up and cleanup any pending messages */
    if(peer->peer_retries++ > mca_oob_tcp_component.tcp_peer_retries) {
        mca_oob_tcp_msg_t *msg;

        opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_shutdown: retries exceeded",
                    ORTE_NAME_ARGS(orte_process_info.my_name),
                    ORTE_NAME_ARGS(&(peer->peer_name)));

        /* There are cases during the initial connection setup where
           the peer_send_msg is NULL but there are things in the queue
           -- handle that case */
        if (NULL != (msg = peer->peer_send_msg)) {
            msg->msg_complete = true;
            msg->msg_rc = ORTE_ERR_UNREACH;
            mca_oob_tcp_msg_complete(msg, &peer->peer_name);
        }
        peer->peer_send_msg = NULL;
        while (NULL != 
               (msg = (mca_oob_tcp_msg_t*)opal_list_remove_first(&peer->peer_send_queue))) {
            msg->msg_complete = true;
            msg->msg_rc = ORTE_ERR_UNREACH;
            mca_oob_tcp_msg_complete(msg, &peer->peer_name);
        }

        /* We were unsuccessful in establishing a connection, and are
           not likely to suddenly become successful, so abort the
           whole thing */
        peer->peer_state = MCA_OOB_TCP_FAILED;
    }

    if (peer->peer_sd >= 0) {
        opal_event_del(&peer->peer_recv_event);
        opal_event_del(&peer->peer_send_event);
        CLOSE_THE_SOCKET(peer->peer_sd);
        peer->peer_sd = -1;
    } 
      
    opal_event_del(&peer->peer_timer_event);
    peer->peer_state = MCA_OOB_TCP_CLOSED;
}
示例#11
0
/*
 * Remove any event registrations associated with the socket
 * and update the endpoint state to reflect the connection has
 * been closed.
 */
void mca_btl_tcp2_endpoint_close(mca_btl_base_endpoint_t* btl_endpoint)
{
    int sd = btl_endpoint->endpoint_sd;

    do {
        if( sd < 0 ) return;
    } while ( opal_atomic_cmpset( &(btl_endpoint->endpoint_sd), sd, -1 ) );

    CLOSE_THE_SOCKET(sd);
    btl_endpoint->endpoint_retries++;
    opal_event_del(&btl_endpoint->endpoint_recv_event);
    opal_event_del(&btl_endpoint->endpoint_send_event);
#if MCA_BTL_TCP_ENDPOINT_CACHE
    if( NULL != btl_endpoint->endpoint_cache )
        free( btl_endpoint->endpoint_cache );
    btl_endpoint->endpoint_cache        = NULL;
    btl_endpoint->endpoint_cache_pos    = NULL;
    btl_endpoint->endpoint_cache_length = 0;
#endif  /* MCA_BTL_TCP_ENDPOINT_CACHE */
}
示例#12
0
static void peer_des(mca_oob_tcp_peer_t *peer)
{
    if (peer->send_ev_active) {
        opal_event_del(&peer->send_event);
    }
    if (peer->recv_ev_active) {
        opal_event_del(&peer->recv_event);
    }
    if (peer->timer_ev_active) {
        opal_event_del(&peer->timer_event);
    }
    if (0 <= peer->sd) {
        opal_output_verbose(2, orte_oob_base_framework.framework_output,
                            "%s CLOSING SOCKET %d",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            peer->sd);
        CLOSE_THE_SOCKET(peer->sd);
    }
    OPAL_LIST_DESTRUCT(&peer->addrs);
    OPAL_LIST_DESTRUCT(&peer->send_queue);
}
示例#13
0
/*
 * loop through all available BTLs for one matching the source address
 * of the request.
 */
void mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr* addr, int sd)
{
    size_t i;
    OPAL_THREAD_LOCK(&btl_proc->proc_lock);
    for( i = 0; i < btl_proc->proc_endpoint_count; i++ ) {
        mca_btl_base_endpoint_t* btl_endpoint = btl_proc->proc_endpoints[i];
        /* Check all conditions before going to try to accept the connection. */
        if( btl_endpoint->endpoint_addr->addr_family != addr->sa_family ) {
            continue;
        }

        switch (addr->sa_family) {
        case AF_INET:
            if( memcmp( &btl_endpoint->endpoint_addr->addr_inet,
                        &(((struct sockaddr_in*)addr)->sin_addr),
                        sizeof(struct in_addr) ) ) {
                continue;
            }
            break;
#if OPAL_ENABLE_IPV6
        case AF_INET6:
            if( memcmp( &btl_endpoint->endpoint_addr->addr_inet,
                        &(((struct sockaddr_in6*)addr)->sin6_addr),
                        sizeof(struct in6_addr) ) ) {
                continue;
            }
            break;
#endif
        default:
            ;
        }

        (void)mca_btl_tcp_endpoint_accept(btl_endpoint, addr, sd);
        OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
        return;
    }
    OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
    /* No further use of this socket. Close it */
    CLOSE_THE_SOCKET(sd);
}
示例#14
0
static void pdes(pmix_peer_t *p)
{
    if (0 <= p->sd) {
        CLOSE_THE_SOCKET(p->sd);
    }
    if (p->send_ev_active) {
        pmix_event_del(&p->send_event);
    }
    if (p->recv_ev_active) {
        pmix_event_del(&p->recv_event);
    }

    if (NULL != p->info) {
        PMIX_RELEASE(p->info);
    }

    PMIX_LIST_DESTRUCT(&p->send_queue);
    if (NULL != p->send_msg) {
        PMIX_RELEASE(p->send_msg);
    }
    if (NULL != p->recv_msg) {
        PMIX_RELEASE(p->recv_msg);
    }
}
示例#15
0
static pmix_status_t try_connect(int *sd)
{
    char *p, *p2, *host;
    struct sockaddr_in *in;
    struct sockaddr_in6 *in6;
    size_t len;
    pmix_status_t rc;
    bool retried = false;

    pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                        "pmix:tcp try connect to %s",
                        mca_ptl_tcp_component.super.uri);

    /* mark that we are the active module for this server */
    pmix_client_globals.myserver->nptr->compat.ptl = &pmix_ptl_tcp_module;

    /* setup the path to the daemon rendezvous point */
    memset(&mca_ptl_tcp_component.connection, 0, sizeof(struct sockaddr_storage));
    if (0 == strncmp(mca_ptl_tcp_component.super.uri, "tcp4", 4)) {
        /* need to skip the tcp4: part */
        p = strdup(&mca_ptl_tcp_component.super.uri[7]);
        if (NULL == p) {
            PMIX_ERROR_LOG(PMIX_ERR_NOMEM);
            return PMIX_ERR_NOMEM;
        }

        /* separate the IP address from the port */
        p2 = strchr(p, ':');
        if (NULL == p2) {
            free(p);
            PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
            return PMIX_ERR_BAD_PARAM;
        }
        *p2 = '\0';
        p2++;
        host = p;
        /* load the address */
        in = (struct sockaddr_in*)&mca_ptl_tcp_component.connection;
        in->sin_family = AF_INET;
        in->sin_addr.s_addr = inet_addr(host);
        if (in->sin_addr.s_addr == INADDR_NONE) {
            free(p);
            PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
            return PMIX_ERR_BAD_PARAM;
        }
        in->sin_port = htons(atoi(p2));
        len = sizeof(struct sockaddr_in);
    } else {
        /* need to skip the tcp6: part */
        p = strdup(&mca_ptl_tcp_component.super.uri[7]);
        if (NULL == p) {
            PMIX_ERROR_LOG(PMIX_ERR_NOMEM);
            return PMIX_ERR_NOMEM;
        }

        p2 = strchr(p, ':');
        if (NULL == p2) {
            free(p);
            PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
            return PMIX_ERR_BAD_PARAM;
        }
        *p2 = '\0';
        if (']' == p[strlen(p)-1]) {
            p[strlen(p)-1] = '\0';
        }
        if ('[' == p[0]) {
            host = &p[1];
        } else {
            host = &p[0];
        }
        /* load the address */
        in6 = (struct sockaddr_in6*)&mca_ptl_tcp_component.connection;
        in6->sin6_family = AF_INET6;
        if (0 == inet_pton(AF_INET6, host, (void*)&in6->sin6_addr)) {
            pmix_output (0, "ptl_tcp_parse_uri: Could not convert %s\n", host);
            free(p);
            PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
            return PMIX_ERR_BAD_PARAM;
        }
        in6->sin6_port = htons(atoi(p2));
        len = sizeof(struct sockaddr_in6);
    }
    free(p);

  retry:
    /* establish the connection */
    if (PMIX_SUCCESS != (rc = pmix_ptl_base_connect(&mca_ptl_tcp_component.connection, len, sd))) {
        /* do not error log - might just be a stale connection point */
        return rc;
    }

    /* send our identity and any authentication credentials to the server */
    if (PMIX_SUCCESS != (rc = send_connect_ack(*sd))) {
        PMIX_ERROR_LOG(rc);
        CLOSE_THE_SOCKET(*sd);
        return rc;
    }

    /* do whatever handshake is required */
    if (PMIX_SUCCESS != (rc = recv_connect_ack(*sd))) {
        CLOSE_THE_SOCKET(*sd);
        if (PMIX_ERR_TEMP_UNAVAILABLE == rc) {
            /* give it two tries */
            if (!retried) {
                retried = true;
                goto retry;
            }
        }
        PMIX_ERROR_LOG(rc);
        return rc;
    }

    return PMIX_SUCCESS;
}
void pmix_rte_finalize(void)
{
    if( --pmix_initialized != 0 ) {
        if( pmix_initialized < 0 ) {
            fprintf(stderr, "PMIx Finalize called too many times\n");
            return;
        }
        return;
    }

    if (!pmix_globals.external_evbase) {
        /* stop the progress thread */
        (void)pmix_progress_thread_finalize(NULL);
    }

    /* cleanup communications */
    pmix_usock_finalize();
    if (PMIX_PROC_SERVER != pmix_globals.proc_type &&
        0 <= pmix_client_globals.myserver.sd) {
        CLOSE_THE_SOCKET(pmix_client_globals.myserver.sd);
    }
    #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)
        pmix_dstore_finalize();
    #endif /* PMIX_ENABLE_DSTORE */

    /* close the security framework */
    (void)pmix_mca_base_framework_close(&pmix_psec_base_framework);

    /* Clear out all the registered MCA params */
    pmix_deregister_params();
    pmix_mca_base_var_finalize();

    /* keyval lex-based parser */
    pmix_util_keyval_parse_finalize();

    (void)pmix_mca_base_framework_close(&pmix_pinstalldirs_base_framework);

    /* finalize the show_help system */
    pmix_show_help_finalize();

    /* finalize the output system.  This has to come *after* the
       malloc code, as the malloc code needs to call into this, but
       the malloc code turning off doesn't affect pmix_output that
       much */
    pmix_output_finalize();

#if 0
    /* close the bfrops */
    (void)pmix_mca_base_framework_close(&pmix_bfrops_base_framework);
#endif

    /* clean out the globals */
    PMIX_RELEASE(pmix_globals.mypeer);
    PMIX_LIST_DESTRUCT(&pmix_globals.nspaces);
    if (NULL != pmix_globals.cache_local) {
        PMIX_RELEASE(pmix_globals.cache_local);
    }
    if (NULL != pmix_globals.cache_remote) {
        PMIX_RELEASE(pmix_globals.cache_remote);
    }
    PMIX_DESTRUCT(&pmix_globals.events);

    #if PMIX_NO_LIB_DESTRUCTOR
        pmix_cleanup();
    #endif
}
示例#17
0
文件: ptl_usock.c 项目: dycz0fx/ompi
static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
                                     pmix_info_t *info, size_t ninfo)
{
    struct sockaddr_un *address;
    char *evar, **uri;
    pmix_status_t rc;
    int sd;
    pmix_socklen_t len;
    bool retried = false;

    pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                        "[%s:%d] connect to server",
                        __FILE__, __LINE__);

    /* if we are not a client, there is nothing we can do */
    if (!PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) {
        return PMIX_ERR_NOT_SUPPORTED;
    }

    /* if we don't have a path to the daemon rendezvous point,
     * then we need to return an error */
    if (NULL != (evar = getenv("PMIX_SERVER_URI2USOCK"))) {
        /* this is a v2.1+ server */
        pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module("v21");
        if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) {
            return PMIX_ERR_INIT;
        }
    } else if (NULL != (evar = getenv("PMIX_SERVER_URI"))) {
        /* this is a pre-v2.1 server - must use the v12 bfrops module */
        pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module("v12");
        if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) {
            return PMIX_ERR_INIT;
        }
    } else {
        /* let the caller know that the server isn't available */
        return PMIX_ERR_SERVER_NOT_AVAIL;
    }
    /* the server will be using the same bfrops as us */
    pmix_client_globals.myserver->nptr->compat.bfrops = pmix_globals.mypeer->nptr->compat.bfrops;
    /* mark that we are using the V1 protocol */
    pmix_globals.mypeer->protocol = PMIX_PROTOCOL_V1;

    uri = pmix_argv_split(evar, ':');
    if (3 != pmix_argv_count(uri)) {
        pmix_argv_free(uri);
        PMIX_ERROR_LOG(PMIX_ERROR);
        return PMIX_ERROR;
    }
    /* set the server nspace */
    if (NULL == pmix_client_globals.myserver->info) {
        pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t);
    }
    if (NULL == pmix_client_globals.myserver->nptr) {
        pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_namespace_t);
    }
    if (NULL == pmix_client_globals.myserver->nptr->nspace) {
        pmix_client_globals.myserver->nptr->nspace = strdup(uri[0]);
    }
    if (NULL == pmix_client_globals.myserver->info->pname.nspace) {
        pmix_client_globals.myserver->info->pname.nspace = strdup(uri[0]);
    }

    /* set the server rank */
    pmix_client_globals.myserver->info->pname.rank = strtoull(uri[1], NULL, 10);

    /* setup the path to the daemon rendezvous point */
    memset(&mca_ptl_usock_component.connection, 0, sizeof(struct sockaddr_storage));
    address = (struct sockaddr_un*)&mca_ptl_usock_component.connection;
    address->sun_family = AF_UNIX;
    snprintf(address->sun_path, sizeof(address->sun_path)-1, "%s", uri[2]);
    /* if the rendezvous file doesn't exist, that's an error */
    if (0 != access(uri[2], R_OK)) {
        pmix_argv_free(uri);
        PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND);
        return PMIX_ERR_NOT_FOUND;
    }
    pmix_argv_free(uri);

  retry:
    /* establish the connection */
    len = sizeof(struct sockaddr_un);
    if (PMIX_SUCCESS != (rc = pmix_ptl_base_connect(&mca_ptl_usock_component.connection, len, &sd))) {
        PMIX_ERROR_LOG(rc);
        return rc;
    }
    pmix_client_globals.myserver->sd = sd;

    /* send our identity and any authentication credentials to the server */
    if (PMIX_SUCCESS != (rc = send_connect_ack(sd))) {
        CLOSE_THE_SOCKET(sd);
        return rc;
    }

    /* do whatever handshake is required */
    if (PMIX_SUCCESS != (rc = recv_connect_ack(sd))) {
        CLOSE_THE_SOCKET(sd);
        if (PMIX_ERR_TEMP_UNAVAILABLE == rc) {
            /* give it two tries */
            if (!retried) {
                retried = true;
                goto retry;
            }
        }
        return rc;
    }

    pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                        "sock_peer_try_connect: Connection across to server succeeded");

    /* mark the connection as made */
    pmix_globals.connected = true;

    pmix_ptl_base_set_nonblocking(sd);

    /* setup recv event */
    pmix_event_assign(&pmix_client_globals.myserver->recv_event,
                      pmix_globals.evbase,
                      pmix_client_globals.myserver->sd,
                      EV_READ | EV_PERSIST,
                      pmix_usock_recv_handler, pmix_client_globals.myserver);
    pmix_event_add(&pmix_client_globals.myserver->recv_event, 0);
    pmix_client_globals.myserver->recv_ev_active = true;
    PMIX_POST_OBJECT(pmix_client_globals.myserver);
    pmix_event_add(&pmix_client_globals.myserver->recv_event, 0);

    /* setup send event */
    pmix_event_assign(&pmix_client_globals.myserver->send_event,
                      pmix_globals.evbase,
                      pmix_client_globals.myserver->sd,
                      EV_WRITE|EV_PERSIST,
                      pmix_usock_send_handler, pmix_client_globals.myserver);
    pmix_client_globals.myserver->send_ev_active = false;

    return PMIX_SUCCESS;
}
示例#18
0
static int setup_socket(int *sd, rmcast_base_channel_t *chan, bool recvsocket)
{
    uint8_t ttl = 1;
    struct sockaddr_in inaddr;
    struct ip_mreq req;
    int addrlen;
    int target_sd;
    int flags;

    OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output,
                         "setup:socket addr %03d.%03d.%03d.%03d port %d",
                         OPAL_IF_FORMAT_ADDR(chan->network), (int)chan->port));
    
    target_sd = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
    if(target_sd < 0) {
        if (EAFNOSUPPORT != opal_socket_errno) {
            opal_output(0,"rmcast:init: socket() failed: %s (%d)", 
                        strerror(opal_socket_errno), opal_socket_errno);
        }
        return ORTE_ERR_IN_ERRNO;
    }
    
    /* set the multicast flags */
    if ((setsockopt(target_sd, IPPROTO_IP, IP_MULTICAST_TTL, 
                    (void *)&ttl, sizeof(ttl))) < 0) {
        opal_output(0,"rmcast:init: socketopt() failed on MULTICAST_TTL: %s (%d)", 
                    strerror(opal_socket_errno), opal_socket_errno);
        return ORTE_ERR_IN_ERRNO;
    }
    
    /* enable port sharing */
    flags = 1;
    if (setsockopt (target_sd, SOL_SOCKET, SO_REUSEADDR, (const char *)&flags, sizeof(flags)) < 0) {
        opal_output(0, "rmcast:udp: unable to set the "
                    "SO_REUSEADDR option (%s:%d)\n",
                    strerror(opal_socket_errno), opal_socket_errno);
        CLOSE_THE_SOCKET(target_sd);
        return ORTE_ERROR;
    }

    /* if this is the recv side... */
    if (recvsocket) {
        memset(&inaddr, 0, sizeof(inaddr));
        inaddr.sin_family = AF_INET;
        inaddr.sin_addr.s_addr = htonl(chan->network);
        inaddr.sin_port = htons(chan->port);
        addrlen = sizeof(struct sockaddr_in);
        
        /* bind the socket */
        if (bind(target_sd, (struct sockaddr*)&inaddr, addrlen) < 0) {
            opal_output(0, "%s rmcast:init: bind() failed for addr %03d.%03d.%03d.%03d port %d\n\tError: %s (%d)",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), OPAL_IF_FORMAT_ADDR(chan->network), (int)chan->port,
                        strerror(opal_socket_errno), opal_socket_errno);
            CLOSE_THE_SOCKET(target_sd);
            return ORTE_ERROR;
        }
        /* set membership on the multicast interface */
        memset(&req, 0, sizeof (req));
        req.imr_multiaddr.s_addr = htonl(chan->network);
        req.imr_interface.s_addr = htonl(chan->interface);
        
        OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output,
                             "setup:socket:membership addr %03d.%03d.%03d.%03d interface %03d.%03d.%03d.%03d",
                             OPAL_IF_FORMAT_ADDR(chan->network), OPAL_IF_FORMAT_ADDR(chan->interface)));

        if ((setsockopt(target_sd, IPPROTO_IP, IP_ADD_MEMBERSHIP, 
                        (void *)&req, sizeof (req))) < 0) {
            opal_output(0, "%s rmcast:init: setsockopt() failed on ADD_MEMBERSHIP\n"
                        "\tfor multicast network %03d.%03d.%03d.%03d interface %03d.%03d.%03d.%03d\n\tError: %s (%d)",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        OPAL_IF_FORMAT_ADDR(chan->network), OPAL_IF_FORMAT_ADDR(chan->interface),
                        strerror(opal_socket_errno), opal_socket_errno);
            CLOSE_THE_SOCKET(target_sd);
            return ORTE_ERROR;
        }
        /* set the recvbuf size */
        if ((setsockopt(target_sd, SOL_SOCKET, SO_RCVBUF,
                        &orte_rmcast_udp_rcvbuf_size, sizeof(orte_rmcast_udp_rcvbuf_size))) < 0) {
            opal_output(0, "%s rmcast:init: setsockopt() failed on SO_RCVBUF\n"
                        "\tfor multicast network %03d.%03d.%03d.%03d interface %03d.%03d.%03d.%03d\n\tError: %s (%d)",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        OPAL_IF_FORMAT_ADDR(chan->network), OPAL_IF_FORMAT_ADDR(chan->interface),
                        strerror(opal_socket_errno), opal_socket_errno);
            CLOSE_THE_SOCKET(target_sd);
            return ORTE_ERROR;
        }
    } else {
        /* on the xmit side, need to set the interface */
        memset(&inaddr, 0, sizeof(inaddr));
        inaddr.sin_addr.s_addr = htonl(chan->interface);
        addrlen = sizeof(struct sockaddr_in);
        
        OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output,
                             "setup:socket:xmit interface %03d.%03d.%03d.%03d",
                             OPAL_IF_FORMAT_ADDR(chan->interface)));
        
        if ((setsockopt(target_sd, IPPROTO_IP, IP_MULTICAST_IF, 
                        (void *)&inaddr, addrlen)) < 0) {
            opal_output(0, "%s rmcast:init: setsockopt() failed on MULTICAST_IF\n"
                        "\tfor multicast network %03d.%03d.%03d.%03d interface %03d.%03d.%03d.%03d\n\tError: %s (%d)",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        OPAL_IF_FORMAT_ADDR(chan->network), OPAL_IF_FORMAT_ADDR(chan->interface),
                        strerror(opal_socket_errno), opal_socket_errno);
            CLOSE_THE_SOCKET(target_sd);
            return ORTE_ERROR;
        }
        /* set the sendbuf size */
        if ((setsockopt(target_sd, SOL_SOCKET, SO_SNDBUF,
                        &orte_rmcast_udp_sndbuf_size, sizeof(orte_rmcast_udp_sndbuf_size))) < 0) {
            opal_output(0, "%s rmcast:init: setsockopt() failed on SO_SNDBUF\n"
                        "\tfor multicast network %03d.%03d.%03d.%03d interface %03d.%03d.%03d.%03d\n\tError: %s (%d)",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        OPAL_IF_FORMAT_ADDR(chan->network), OPAL_IF_FORMAT_ADDR(chan->interface),
                        strerror(opal_socket_errno), opal_socket_errno);
            CLOSE_THE_SOCKET(target_sd);
            return ORTE_ERROR;
        }
    }
    
    /* set socket up to be non-blocking */
    if((flags = fcntl(target_sd, F_GETFL, 0)) < 0) {
        opal_output(0, "rmcast:init: fcntl(F_GETFL) failed: %s (%d)", 
                    strerror(opal_socket_errno), opal_socket_errno);
        return ORTE_ERROR;
    } else {
        flags |= O_NONBLOCK;
        if(fcntl(target_sd, F_SETFL, flags) < 0) {
            opal_output(0, "rmcast:init: fcntl(F_SETFL) failed: %s (%d)", 
                        strerror(opal_socket_errno), opal_socket_errno);
            return ORTE_ERROR;
        }
    }
    
    /* return the socket */
    *sd = target_sd;
    
    return ORTE_SUCCESS;
}
示例#19
0
文件: ptl_usock.c 项目: karasevb/ompi
static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
                                     pmix_info_t *info, size_t ninfo)
{
    struct sockaddr_un *address;
    char *evar, **uri;
    pmix_status_t rc;
    int sd;
    pmix_socklen_t len;

    /* if we are not a client, there is nothing we can do */
    if (!PMIX_PROC_IS_CLIENT) {
        return PMIX_ERR_NOT_SUPPORTED;
    }

    /* if we don't have a path to the daemon rendezvous point,
     * then we need to return an error */
    if (NULL == (evar = getenv("PMIX_SERVER_URI"))) {
        /* let the caller know that the server isn't available */
        return PMIX_ERR_SERVER_NOT_AVAIL;
    }
    uri = pmix_argv_split(evar, ':');
    if (3 != pmix_argv_count(uri)) {
        pmix_argv_free(uri);
        return PMIX_ERROR;
    }

    /* set the server nspace */
    if (NULL == pmix_client_globals.myserver->info) {
        pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t);
    }
    if (NULL == pmix_client_globals.myserver->nptr) {
        pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t);
    }
    if (NULL == pmix_client_globals.myserver->nptr->nspace) {
        pmix_client_globals.myserver->nptr->nspace = strdup(uri[0]);
    }
    if (NULL == pmix_client_globals.myserver->info->pname.nspace) {
        pmix_client_globals.myserver->info->pname.nspace = strdup(uri[0]);
    }

    /* set the server rank */
    pmix_client_globals.myserver->info->pname.rank = strtoull(uri[1], NULL, 10);

    /* setup the path to the daemon rendezvous point */
    memset(&mca_ptl_usock_component.connection, 0, sizeof(struct sockaddr_storage));
    address = (struct sockaddr_un*)&mca_ptl_usock_component.connection;
    address->sun_family = AF_UNIX;
    snprintf(address->sun_path, sizeof(address->sun_path)-1, "%s", uri[2]);
    /* if the rendezvous file doesn't exist, that's an error */
    if (0 != access(uri[2], R_OK)) {
        pmix_argv_free(uri);
        return PMIX_ERR_NOT_FOUND;
    }
    pmix_argv_free(uri);

    /* establish the connection */
    len = sizeof(struct sockaddr_un);
    if (PMIX_SUCCESS != (rc = pmix_ptl_base_connect(&mca_ptl_usock_component.connection, len, &sd))) {
        PMIX_ERROR_LOG(rc);
        return rc;
    }
    pmix_client_globals.myserver->sd = sd;

    /* send our identity and any authentication credentials to the server */
    if (PMIX_SUCCESS != (rc = send_connect_ack(sd))) {
        CLOSE_THE_SOCKET(sd);
        return rc;
    }

    /* do whatever handshake is required */
    if (PMIX_SUCCESS != (rc = recv_connect_ack(sd))) {
        CLOSE_THE_SOCKET(sd);
        return rc;
    }

    pmix_output_verbose(2, pmix_globals.debug_output,
                        "sock_peer_try_connect: Connection across to server succeeded");

    /* mark the connection as made */
    pmix_globals.connected = true;

    pmix_ptl_base_set_nonblocking(sd);

    /* setup recv event */
    pmix_event_assign(&pmix_client_globals.myserver->recv_event,
                      pmix_globals.evbase,
                      pmix_client_globals.myserver->sd,
                      EV_READ | EV_PERSIST,
                      pmix_ptl_base_recv_handler, &pmix_client_globals.myserver);
    pmix_event_add(&pmix_client_globals.myserver->recv_event, 0);
    pmix_client_globals.myserver->recv_ev_active = true;

    /* setup send event */
    pmix_event_assign(&pmix_client_globals.myserver->send_event,
                      pmix_globals.evbase,
                      pmix_client_globals.myserver->sd,
                      EV_WRITE|EV_PERSIST,
                      pmix_ptl_base_send_handler, &pmix_client_globals.myserver);
    pmix_client_globals.myserver->send_ev_active = false;

    return PMIX_SUCCESS;
}
示例#20
0
/*
 *  Receive the peers globally unique process identification from a newly
 *  connected socket and verify the expected response. If so, move the
 *  socket to a connected state.
 */
static int usock_recv_connect_ack(void)
{
    char *msg;
    char *version;
    int rc;
    char *cred;
    size_t credsize;
    pmix_usock_hdr_t hdr;

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s RECV CONNECT ACK FROM SERVER ON SOCKET %d",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                        mca_pmix_native_component.sd);

    /* ensure all is zero'd */
    memset(&hdr, 0, sizeof(pmix_usock_hdr_t));

    if (usock_recv_blocking((char*)&hdr, sizeof(pmix_usock_hdr_t))) {
        /* If the state is CONNECT_ACK, then we were waiting for
         * the connection to be ack'd
         */
        if (mca_pmix_native_component.state != PMIX_USOCK_CONNECT_ACK) {
            /* handshake broke down - abort this connection */
            opal_output(0, "%s RECV CONNECT BAD HANDSHAKE FROM SERVER ON SOCKET %d",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                        mca_pmix_native_component.sd);
            mca_pmix_native_component.state = PMIX_USOCK_FAILED;
            CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
            return OPAL_ERR_UNREACH;
        }
    } else {
        /* unable to complete the recv */
        opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                            "%s unable to complete recv of connect-ack from server ON SOCKET %d",
                            OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                            mca_pmix_native_component.sd);
        return OPAL_ERR_UNREACH;
    }

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s connect-ack recvd from server",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    /* compare the servers name to the expected value */
    if (0 != opal_compare_proc(hdr.id, mca_pmix_native_component.server)) {
        opal_output(0, "usock_peer_recv_connect_ack: "
                    "%s received unexpected process identifier (%s) from server: expected (%s)",
                    OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                    OPAL_NAME_PRINT(hdr.id),
                    OPAL_NAME_PRINT(mca_pmix_native_component.server));
        mca_pmix_native_component.state = PMIX_USOCK_FAILED;
        CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
        return OPAL_ERR_UNREACH;
    }

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s connect-ack header from server is okay",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    /* get the authentication and version payload */
    if (NULL == (msg = (char*)malloc(hdr.nbytes))) {
        mca_pmix_native_component.state = PMIX_USOCK_FAILED;
        CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
        return OPAL_ERR_OUT_OF_RESOURCE;
    }
    if (!usock_recv_blocking(msg, hdr.nbytes)) {
        /* unable to complete the recv */
        opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                            "%s unable to complete recv of connect-ack from server ON SOCKET %d",
                            OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                            mca_pmix_native_component.sd);
        free(msg);
        return OPAL_ERR_UNREACH;
    }

    /* check that this is from a matching version */
    version = (char*)(msg);
    if (0 != strcmp(version, opal_version_string)) {
        opal_output(0, "usock_peer_recv_connect_ack: "
                    "%s received different version from server: %s instead of %s",
                    OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                    version, opal_version_string);
        mca_pmix_native_component.state = PMIX_USOCK_FAILED;
        CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
        free(msg);
        return OPAL_ERR_UNREACH;
    }

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s connect-ack version from server matches ours",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    /* check security token */
    cred = (char*)(msg + strlen(version) + 1);
    credsize = hdr.nbytes - strlen(version) - 1;
    if (OPAL_SUCCESS != (rc = opal_sec.authenticate(cred, credsize, NULL))) {
        OPAL_ERROR_LOG(rc);
        mca_pmix_native_component.state = PMIX_USOCK_FAILED;
        CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
        free(msg);
        return OPAL_ERR_UNREACH;
    }
    free(msg);

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s connect-ack from server authenticated",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    /* connected */
    mca_pmix_native_component.state = PMIX_USOCK_CONNECTED;
    /* initiate send of first message on queue */
    if (NULL == mca_pmix_native_component.send_msg) {
        mca_pmix_native_component.send_msg = (pmix_usock_send_t*)
            opal_list_remove_first(&mca_pmix_native_component.send_queue);
    }
    if (NULL != mca_pmix_native_component.send_msg && !mca_pmix_native_component.send_ev_active) {
        opal_event_add(&mca_pmix_native_component.send_event, 0);
        mca_pmix_native_component.send_ev_active = true;
    }
    if (2 <= opal_output_get_verbosity(opal_pmix_base_framework.framework_output)) {
        pmix_usock_dump("connected");
    }
    return OPAL_SUCCESS;
}
示例#21
0
/*
 * Check the status of the connection. If the connection failed, will retry
 * later. Otherwise, send this process' identifier to the server on the
 * newly connected socket.
 */
static void usock_complete_connect(void)
{
    int so_error = 0;
    opal_socklen_t so_length = sizeof(so_error);

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s usock:complete_connect called for server on socket %d",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                        mca_pmix_native_component.sd);

    /* check connect completion status */
    if (getsockopt(mca_pmix_native_component.sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) {
        opal_output(0, "%s usock_peer_complete_connect: getsockopt() to server failed: %s (%d)\n",
                    OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), 
                    strerror(opal_socket_errno),
                    opal_socket_errno);
        mca_pmix_native_component.state = PMIX_USOCK_FAILED;
        CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
        return;
    }

    if (so_error == EINPROGRESS) {
        opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                            "%s usock:send:handler still in progress",
                            OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
        return;
    } else if (so_error == ECONNREFUSED || so_error == ETIMEDOUT) {
        opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                            "%s usock_peer_complete_connect: connection to server failed: %s (%d)",
                            OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                            strerror(so_error),
                            so_error);
        mca_pmix_native_component.state = PMIX_USOCK_FAILED;
        CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
        return;
    } else if (so_error != 0) {
        /* No need to worry about the return code here - we return regardless
           at this point, and if an error did occur a message has already been
           printed for the user */
        opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                            "%s usock_peer_complete_connect: "
                            "connection to server failed with error %d",
                            OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                            so_error);
        mca_pmix_native_component.state = PMIX_USOCK_FAILED;
        CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
        return;
    }

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s usock_peer_complete_connect: sending ack to server",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    if (usock_send_connect_ack() == OPAL_SUCCESS) {
        mca_pmix_native_component.state = PMIX_USOCK_CONNECT_ACK;
        opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                            "%s usock_peer_complete_connect: setting read event on connection to server",
                            OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
        
        if (!mca_pmix_native_component.recv_ev_active) {
            opal_event_add(&mca_pmix_native_component.recv_event, 0);
            mca_pmix_native_component.recv_ev_active = true;
        }
    } else {
        opal_output(0, "%s usock_complete_connect: unable to send connect ack to server",
                    OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
        mca_pmix_native_component.state = PMIX_USOCK_FAILED;
        CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
    }
}
示例#22
0
void pmix_usock_recv_handler(int sd, short flags, void *cbdata)
{
    int rc;

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s usock:recv:handler called",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    switch (mca_pmix_native_component.state) {
    case PMIX_USOCK_CONNECT_ACK:
        if (OPAL_SUCCESS == (rc = usock_recv_connect_ack())) {
            opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                "%s usock:recv:handler starting send/recv events",
                                OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
            /* we connected! Start the send/recv events */
            if (!mca_pmix_native_component.recv_ev_active) {
                opal_event_add(&mca_pmix_native_component.recv_event, 0);
                mca_pmix_native_component.recv_ev_active = true;
            }
            if (mca_pmix_native_component.timer_ev_active) {
                opal_event_del(&mca_pmix_native_component.timer_event);
                mca_pmix_native_component.timer_ev_active = false;
            }
            /* if there is a message waiting to be sent, queue it */
            if (NULL == mca_pmix_native_component.send_msg) {
                mca_pmix_native_component.send_msg = (pmix_usock_send_t*)opal_list_remove_first(&mca_pmix_native_component.send_queue);
            }
            if (NULL != mca_pmix_native_component.send_msg && !mca_pmix_native_component.send_ev_active) {
                opal_event_add(&mca_pmix_native_component.send_event, 0);
                mca_pmix_native_component.send_ev_active = true;
            }
            /* update our state */
            mca_pmix_native_component.state = PMIX_USOCK_CONNECTED;
        } else {
            opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                "%s UNABLE TO COMPLETE CONNECT ACK WITH SERVER",
                                OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
            opal_event_del(&mca_pmix_native_component.recv_event);
            mca_pmix_native_component.recv_ev_active = false;
            return;
        }
        break;
    case PMIX_USOCK_CONNECTED:
        opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                            "%s usock:recv:handler CONNECTED",
                            OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
        /* allocate a new message and setup for recv */
        if (NULL == mca_pmix_native_component.recv_msg) {
            opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                "%s usock:recv:handler allocate new recv msg",
                                OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
            mca_pmix_native_component.recv_msg = OBJ_NEW(pmix_usock_recv_t);
            if (NULL == mca_pmix_native_component.recv_msg) {
                opal_output(0, "%s usock_recv_handler: unable to allocate recv message\n",
                            OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
                return;
            }
            /* start by reading the header */
            mca_pmix_native_component.recv_msg->rdptr = (char*)&mca_pmix_native_component.recv_msg->hdr;
            mca_pmix_native_component.recv_msg->rdbytes = sizeof(pmix_usock_hdr_t);
        }
        /* if the header hasn't been completely read, read it */
        if (!mca_pmix_native_component.recv_msg->hdr_recvd) {
            opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                "usock:recv:handler read hdr");
            if (OPAL_SUCCESS == (rc = read_bytes(mca_pmix_native_component.recv_msg))) {
                /* completed reading the header */
                mca_pmix_native_component.recv_msg->hdr_recvd = true;
                /* if this is a zero-byte message, then we are done */
                if (0 == mca_pmix_native_component.recv_msg->hdr.nbytes) {
                    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                        "%s RECVD ZERO-BYTE MESSAGE FROM SERVER for tag %d",
                                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                                        mca_pmix_native_component.recv_msg->hdr.tag);
                    mca_pmix_native_component.recv_msg->data = NULL;  // make sure
                    mca_pmix_native_component.recv_msg->rdptr = NULL;
                    mca_pmix_native_component.recv_msg->rdbytes = 0;
                } else {
                    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                        "%s usock:recv:handler allocate data region of size %lu",
                                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                                        (unsigned long)mca_pmix_native_component.recv_msg->hdr.nbytes);
                    /* allocate the data region */
                    mca_pmix_native_component.recv_msg->data = (char*)malloc(mca_pmix_native_component.recv_msg->hdr.nbytes);
                    /* point to it */
                    mca_pmix_native_component.recv_msg->rdptr = mca_pmix_native_component.recv_msg->data;
                    mca_pmix_native_component.recv_msg->rdbytes = mca_pmix_native_component.recv_msg->hdr.nbytes;
                }
                /* fall thru and attempt to read the data */
            } else if (OPAL_ERR_RESOURCE_BUSY == rc ||
                       OPAL_ERR_WOULD_BLOCK == rc) {
                /* exit this event and let the event lib progress */
                return;
            } else {
                /* close the connection */
                opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                    "%s usock:recv:handler error reading bytes - closing connection",
                                    OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
                CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
                return;
            }
        }

        if (mca_pmix_native_component.recv_msg->hdr_recvd) {
            /* continue to read the data block - we start from
             * wherever we left off, which could be at the
             * beginning or somewhere in the message
             */
            if (OPAL_SUCCESS == (rc = read_bytes(mca_pmix_native_component.recv_msg))) {
                /* we recvd all of the message */
                opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                    "%s RECVD COMPLETE MESSAGE FROM SERVER OF %d BYTES FOR TAG %d",
                                    OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                                    (int)mca_pmix_native_component.recv_msg->hdr.nbytes,
                                    mca_pmix_native_component.recv_msg->hdr.tag);
                /* post it for delivery */
                PMIX_ACTIVATE_POST_MSG(mca_pmix_native_component.recv_msg);
                mca_pmix_native_component.recv_msg = NULL;
            } else if (OPAL_ERR_RESOURCE_BUSY == rc ||
                       OPAL_ERR_WOULD_BLOCK == rc) {
                /* exit this event and let the event lib progress */
                return;
            } else {
                // report the error
                opal_output(0, "%s usock_peer_recv_handler: unable to recv message",
                            OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
                /* turn off the recv event */
                opal_event_del(&mca_pmix_native_component.recv_event);
                mca_pmix_native_component.recv_ev_active = false;
                CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
                return;
            }
        }
        break;
    default: 
        opal_output(0, "%s usock_peer_recv_handler: invalid socket state(%d)",
                    OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), 
                    mca_pmix_native_component.state);
        CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
        break;
    }
}
示例#23
0
/*
 * A blocking recv on a non-blocking socket. Used to receive the small amount of connection
 * information that identifies the peers endpoint.
 */
static bool usock_recv_blocking(char *data, size_t size)
{
    size_t cnt = 0;

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s waiting for connect ack from server",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    while (cnt < size) {
        int retval = recv(mca_pmix_native_component.sd, (char *)data+cnt, size-cnt, 0);

        /* remote closed connection */
        if (retval == 0) {
            opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                "%s usock_recv_blocking: server closed connection: state %d",
                                OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                                mca_pmix_native_component.state);
            mca_pmix_native_component.state = PMIX_USOCK_CLOSED;
            CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
            return false;
        }

        /* socket is non-blocking so handle errors */
        if (retval < 0) {
            if (opal_socket_errno != EINTR && 
                opal_socket_errno != EAGAIN && 
                opal_socket_errno != EWOULDBLOCK) {
                if (mca_pmix_native_component.state == PMIX_USOCK_CONNECT_ACK) {
                    /* If we overflow the listen backlog, it's
                       possible that even though we finished the three
                       way handshake, the remote host was unable to
                       transition the connection from half connected
                       (received the initial SYN) to fully connected
                       (in the listen backlog).  We likely won't see
                       the failure until we try to receive, due to
                       timing and the like.  The first thing we'll get
                       in that case is a RST packet, which receive
                       will turn into a connection reset by peer
                       errno.  In that case, leave the socket in
                       CONNECT_ACK and propogate the error up to
                       recv_connect_ack, who will try to establish the
                       connection again */
                    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                        "%s connect ack received error %s from server",
                                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                                        strerror(opal_socket_errno));
                    return false;
                } else {
                    opal_output(0, 
                                "%s usock_recv_blocking: "
                                "recv() failed for server: %s (%d)\n",
                                OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                                strerror(opal_socket_errno),
                                opal_socket_errno);
                    mca_pmix_native_component.state = PMIX_USOCK_FAILED;
                    CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
                    return false;
                }
            }
            continue;
        }
        cnt += retval;
    }

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s connect ack received from server",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
    return true;
}
示例#24
0
文件: ptl_tcp.c 项目: sjeaugey/ompi
static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
                                     pmix_info_t *info, size_t ninfo)
{
    char *evar, **uri;
    char *filename, *host;
    FILE *fp;
    char *srvr, *p, *p2;
    struct sockaddr_in *in;
    struct sockaddr_in6 *in6;
    pmix_socklen_t len;
    int sd, rc;

    pmix_output_verbose(2, pmix_globals.debug_output,
                        "ptl:tcp: connecting to server");

    /* see if the connection info is in the info array - if
     * so, then that overrides all other options */


    /* if I am a client, then we need to look for the appropriate
     * connection info in the environment */
    if (PMIX_PROC_CLIENT == pmix_globals.proc_type) {
        if (NULL == (evar = getenv("PMIX_SERVER_URI2"))) {
            /* not us */
            return PMIX_ERR_NOT_SUPPORTED;
        }

        /* the URI consists of  elements:
        *    - server nspace.rank
        *    - ptl rendezvous URI
        */
        uri = pmix_argv_split(evar, ';');
        if (2 != pmix_argv_count(uri)) {
            PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
            pmix_argv_free(uri);
            return PMIX_ERR_NOT_SUPPORTED;
        }

        /* set the server nspace */
        p = uri[0];
        if (NULL == (p2 = strchr(p, '.'))) {
            PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
            pmix_argv_free(uri);
            return PMIX_ERR_NOT_SUPPORTED;
        }
        *p2 = '\0';
        ++p2;
        pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t);
        pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t);
        (void)strncpy(pmix_client_globals.myserver.info->nptr->nspace, p, PMIX_MAX_NSLEN);

        /* set the server rank */
        pmix_client_globals.myserver.info->rank = strtoull(p2, NULL, 10);

        /* save the URI, but do not overwrite what we may have received from
         * the info-key directives */
        if (NULL == mca_ptl_tcp_component.super.uri) {
            mca_ptl_tcp_component.super.uri = strdup(uri[1]);
        }
        pmix_argv_free(uri);

    } else if (PMIX_PROC_TOOL == pmix_globals.proc_type) {
        /* if we already have a URI, then look no further */
        if (NULL == mca_ptl_tcp_component.super.uri) {
            /* we have to discover the connection info,
             * if possible. Start by looking for the connection
             * info in the expected place - if the server supports
             * tool connections via TCP, then there will be a
             * "contact.txt" file under the system tmpdir */
            filename = pmix_os_path(false, mca_ptl_tcp_component.tmpdir, "pmix-contact.txt", NULL);
            if (NULL == filename) {
                return PMIX_ERR_NOMEM;
            }
            fp = fopen(filename, "r");
            if (NULL == fp) {
                /* if we cannot open the file, then the server must not
                 * be configured to support tool connections - so abort */
                free(filename);
                return PMIX_ERR_UNREACH;
            }
            free(filename);
            /* get the URI */
            srvr = pmix_getline(fp);
            if (NULL == srvr) {
                PMIX_ERROR_LOG(PMIX_ERR_FILE_READ_FAILURE);
                fclose(fp);
                return PMIX_ERR_UNREACH;
            }
            fclose(fp);
            /* up to the first ';' is the server nspace/rank */
            if (NULL == (p = strchr(srvr, ';'))) {
                /* malformed */
                free(srvr);
                return PMIX_ERR_UNREACH;
            }
            *p = '\0';
            ++p;  // move past the semicolon
            /* the nspace is the section up to the '.' */
            if (NULL == (p2 = strchr(srvr, '.'))) {
                /* malformed */
                free(srvr);
                return PMIX_ERR_UNREACH;
            }
            *p2 = '\0';
            ++p2;
            /* set the server nspace */
            pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t);
            pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t);
            (void)strncpy(pmix_client_globals.myserver.info->nptr->nspace, srvr, PMIX_MAX_NSLEN);
            pmix_client_globals.myserver.info->rank = strtoull(p2, NULL, 10);
            /* now parse the uti itself */
            mca_ptl_tcp_component.super.uri = strdup(p);
            free(srvr);
        }
    }

    /* mark that we are the active module for this server */
    pmix_client_globals.myserver.compat.ptl = &pmix_ptl_tcp_module;

    /* setup the path to the daemon rendezvous point */
    memset(&mca_ptl_tcp_component.connection, 0, sizeof(struct sockaddr_storage));
    if (0 == strncmp(mca_ptl_tcp_component.super.uri, "tcp4", 4)) {
        /* separate the IP address from the port */
        p = strdup(mca_ptl_tcp_component.super.uri);
        if (NULL == p) {
            return PMIX_ERR_NOMEM;
        }
        p2 = strchr(&p[7], ':');
        if (NULL == p2) {
            free(p);
            return PMIX_ERR_BAD_PARAM;
        }
        *p2 = '\0';
        ++p2;
        host = &p[7];
        /* load the address */
        in = (struct sockaddr_in*)&mca_ptl_tcp_component.connection;
        in->sin_family = AF_INET;
        in->sin_addr.s_addr = inet_addr(host);
        if (in->sin_addr.s_addr == INADDR_NONE) {
            free(p);
            return PMIX_ERR_BAD_PARAM;
        }
        in->sin_port = htons(atoi(p2));
        len = sizeof(struct sockaddr_in);
    } else {
        /* separate the IP address from the port */
        p = strdup(mca_ptl_tcp_component.super.uri);
        if (NULL == p) {
            return PMIX_ERR_NOMEM;
        }
        p2 = strchr(&p[7], ':');
        if (NULL == p2) {
            free(p);
            return PMIX_ERR_BAD_PARAM;
        }
        *p2 = '\0';
        if (']' == p[strlen(p)-1]) {
            p[strlen(p)-1] = '\0';
        }
        if ('[' == p[7]) {
            host = &p[8];
        } else {
            host = &p[7];
        }
        /* load the address */
        in6 = (struct sockaddr_in6*)&mca_ptl_tcp_component.connection;
        in6->sin6_family = AF_INET6;
        if (0 == inet_pton(AF_INET6, host, (void*)&in6->sin6_addr)) {
            pmix_output (0, "ptl_tcp_parse_uri: Could not convert %s\n", host);
            free(p);
            return PMIX_ERR_BAD_PARAM;
        }
        in6->sin6_port = htons(atoi(p2));
        len = sizeof(struct sockaddr_in6);
    }
    free(p);

    /* establish the connection */
    if (PMIX_SUCCESS != (rc = pmix_ptl_base_connect(&mca_ptl_tcp_component.connection, len, &sd))) {
        PMIX_ERROR_LOG(rc);
        return rc;
    }
    pmix_client_globals.myserver.sd = sd;

    /* send our identity and any authentication credentials to the server */
    if (PMIX_SUCCESS != (rc = send_connect_ack(sd))) {
        PMIX_ERROR_LOG(rc);
        CLOSE_THE_SOCKET(sd);
        return rc;
    }

    /* do whatever handshake is required */
    if (PMIX_SUCCESS != (rc = recv_connect_ack(sd))) {
        PMIX_ERROR_LOG(rc);
        CLOSE_THE_SOCKET(sd);
        return rc;
    }

    pmix_output_verbose(2, pmix_globals.debug_output,
                        "sock_peer_try_connect: Connection across to server succeeded");

    /* mark the connection as made */
    pmix_globals.connected = true;

    pmix_ptl_base_set_nonblocking(sd);

    /* setup recv event */
    pmix_event_assign(&pmix_client_globals.myserver.recv_event,
                      pmix_globals.evbase,
                      pmix_client_globals.myserver.sd,
                      EV_READ | EV_PERSIST,
                      pmix_ptl_base_recv_handler, &pmix_client_globals.myserver);
    pmix_event_add(&pmix_client_globals.myserver.recv_event, 0);
    pmix_client_globals.myserver.recv_ev_active = true;

    /* setup send event */
    pmix_event_assign(&pmix_client_globals.myserver.send_event,
                      pmix_globals.evbase,
                      pmix_client_globals.myserver.sd,
                      EV_WRITE|EV_PERSIST,
                      pmix_ptl_base_send_handler, &pmix_client_globals.myserver);
    pmix_client_globals.myserver.send_ev_active = false;

    return PMIX_SUCCESS;
}
示例#25
0
static int read_bytes(pmix_usock_recv_t* recv)
{
    int rc;

    /* read until all bytes recvd or error */
    while (0 < recv->rdbytes) {
        rc = read(mca_pmix_native_component.sd, recv->rdptr, recv->rdbytes);
        if (rc < 0) {
            if(opal_socket_errno == EINTR) {
                continue;
            } else if (opal_socket_errno == EAGAIN) {
                /* tell the caller to keep this message on active,
                 * but let the event lib cycle so other messages
                 * can progress while this socket is busy
                 */
                return OPAL_ERR_RESOURCE_BUSY;
            } else if (opal_socket_errno == EWOULDBLOCK) {
                /* tell the caller to keep this message on active,
                 * but let the event lib cycle so other messages
                 * can progress while this socket is busy
                 */
                return OPAL_ERR_WOULD_BLOCK;
            }
            /* we hit an error and cannot progress this message - report
             * the error back to the RML and let the caller know
             * to abort this message
             */
            opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                "%s pmix_usock_msg_recv: readv failed: %s (%d)",
                                OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), 
                                strerror(opal_socket_errno),
                                opal_socket_errno);
            return OPAL_ERR_COMM_FAILURE;
        } else if (rc == 0)  {
            /* the remote peer closed the connection - report that condition
             * and let the caller know
             */
            opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                "%s pmix_usock_msg_recv: peer closed connection",
                                OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
            /* stop all events */
            if (mca_pmix_native_component.recv_ev_active) {
                opal_event_del(&mca_pmix_native_component.recv_event);
                mca_pmix_native_component.recv_ev_active = false;
            }
            if (mca_pmix_native_component.timer_ev_active) {
                opal_event_del(&mca_pmix_native_component.timer_event);
                mca_pmix_native_component.timer_ev_active = false;
            }
            if (mca_pmix_native_component.send_ev_active) {
                opal_event_del(&mca_pmix_native_component.send_event);
                mca_pmix_native_component.send_ev_active = false;
            }
            if (NULL != mca_pmix_native_component.recv_msg) {
                OBJ_RELEASE(mca_pmix_native_component.recv_msg);
                mca_pmix_native_component.recv_msg = NULL;
            }
            CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
            return OPAL_ERR_WOULD_BLOCK;
        }
        /* we were able to read something, so adjust counters and location */
        recv->rdbytes -= rc;
        recv->rdptr += rc;
    }

    /* we read the full data block */
    return OPAL_SUCCESS;
}
/*
 * A blocking recv on a non-blocking socket. Used to receive the small amount of connection
 * information that identifies the peers endpoint.
 */
static bool usock_peer_recv_blocking(pmix_server_peer_t* peer,
                                     int sd, void* data, size_t size)
{
    unsigned char* ptr = (unsigned char*)data;
    size_t cnt = 0;

    opal_output_verbose(2, pmix_server_output,
                        "%s waiting for connect ack from %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        (NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)));

    while (cnt < size) {
        int retval = recv(sd, (char *)ptr+cnt, size-cnt, 0);

        /* remote closed connection */
        if (retval == 0) {
            opal_output_verbose(2, pmix_server_output,
                                "%s-%s usock_peer_recv_blocking: "
                                "peer closed connection: peer state %d",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                (NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)),
                                (NULL == peer) ? 0 : peer->state);
            peer->state = PMIX_SERVER_FAILED;
            CLOSE_THE_SOCKET(peer->sd);
            return false;
        }

        /* socket is non-blocking so handle errors */
        if (retval < 0) {
            if (opal_socket_errno != EINTR && 
                opal_socket_errno != EAGAIN && 
                opal_socket_errno != EWOULDBLOCK) {
                if (peer->state == PMIX_SERVER_CONNECT_ACK) {
                    /* If we overflow the listen backlog, it's
                       possible that even though we finished the three
                       way handshake, the remote host was unable to
                       transition the connection from half connected
                       (received the initial SYN) to fully connected
                       (in the listen backlog).  We likely won't see
                       the failure until we try to receive, due to
                       timing and the like.  The first thing we'll get
                       in that case is a RST packet, which receive
                       will turn into a connection reset by peer
                       errno.  In that case, leave the socket in
                       CONNECT_ACK and propogate the error up to
                       recv_connect_ack, who will try to establish the
                       connection again */
                    opal_output_verbose(2, pmix_server_output,
                                        "%s connect ack received error %s from %s",
                                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                        strerror(opal_socket_errno),
                                        (NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)));
                    return false;
                } else {
                    opal_output(0, 
                                "%s usock_peer_recv_blocking: "
                                "recv() failed for %s: %s (%d)\n",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                (NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)),
                                strerror(opal_socket_errno),
                                opal_socket_errno);
                    if (NULL != peer) {
                        peer->state = PMIX_SERVER_FAILED;
                        CLOSE_THE_SOCKET(peer->sd);
                    } else {
                        CLOSE_THE_SOCKET(sd);
                    }
                    return false;
                }
            }
            continue;
        }
        cnt += retval;
    }

    opal_output_verbose(2, pmix_server_output,
                        "%s connect ack received from %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        (NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)));
    return true;
}
/*
 *  Receive the peers globally unique process identification from a newly
 *  connected socket and verify the expected response. If so, move the
 *  socket to a connected state.
 */
int pmix_server_recv_connect_ack(pmix_server_peer_t* pr, int sd,
                                 pmix_server_hdr_t *dhdr)
{
    char *msg;
    char *version;
    int rc;
    opal_sec_cred_t creds;
    pmix_server_peer_t *peer;
    pmix_server_hdr_t hdr;
    orte_process_name_t sender;

    opal_output_verbose(2, pmix_server_output,
                        "%s RECV CONNECT ACK FROM %s ON SOCKET %d",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        (NULL == pr) ? "UNKNOWN" : ORTE_NAME_PRINT(&pr->name), sd);

    peer = pr;
    /* ensure all is zero'd */
    memset(&hdr, 0, sizeof(pmix_server_hdr_t));

    if (usock_peer_recv_blocking(peer, sd, &hdr, sizeof(pmix_server_hdr_t))) {
        if (NULL != peer) {
            /* If the peer state is CONNECT_ACK, then we were waiting for
             * the connection to be ack'd
             */
            if (peer->state != PMIX_SERVER_CONNECT_ACK) {
                /* handshake broke down - abort this connection */
                opal_output(0, "%s RECV CONNECT BAD HANDSHAKE FROM %s ON SOCKET %d",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            ORTE_NAME_PRINT(&peer->name), sd);
                peer->state = PMIX_SERVER_FAILED;
                CLOSE_THE_SOCKET(peer->sd);
                return ORTE_ERR_UNREACH;
            }
        }
    } else {
        /* unable to complete the recv */
        opal_output_verbose(2, pmix_server_output,
                            "%s unable to complete recv of connect-ack from %s ON SOCKET %d",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            (NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&peer->name), sd);
        return ORTE_ERR_UNREACH;
    }
    /* if the requestor wanted the header returned, then do so now */
    if (NULL != dhdr) {
        *dhdr = hdr;
    }

    if (hdr.type != PMIX_USOCK_IDENT) {
        opal_output(0, "usock_peer_recv_connect_ack: invalid header type: %d\n", hdr.type);
        if (NULL != peer) {
            peer->state = PMIX_SERVER_FAILED;
            CLOSE_THE_SOCKET(peer->sd);
        } else {
            CLOSE_THE_SOCKET(sd);
        }
        return ORTE_ERR_UNREACH;
    }

    opal_output_verbose(2, pmix_server_output,
                        "%s connect-ack recvd from %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        (NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&peer->name));

    sender = hdr.id;
    /* if we don't already have it, get the peer */
    if (NULL == peer) {
        peer = pmix_server_peer_lookup(sd);
        if (NULL == peer) {
            opal_output_verbose(2, pmix_server_output,
                                "%s pmix_server_recv_connect: connection from new peer",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            peer = OBJ_NEW(pmix_server_peer_t);
            peer->name = sender;
            peer->state = PMIX_SERVER_ACCEPTING;
            peer->sd = sd;
            if (OPAL_SUCCESS != opal_hash_table_set_value_uint64(pmix_server_peers, sd, peer)) {
                OBJ_RELEASE(peer);
                CLOSE_THE_SOCKET(sd);
                return ORTE_ERR_UNREACH;
            }
        } else if (PMIX_SERVER_CONNECTED == peer->state ||
                   PMIX_SERVER_CONNECTING == peer->state ||
                   PMIX_SERVER_CONNECT_ACK == peer->state) {
            /* if I already have an established such a connection, then we need
             * to reject this connection */
            opal_output_verbose(2, pmix_server_output,
                                "%s EXISTING CONNECTION WITH %s",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                ORTE_NAME_PRINT(&sender));
            if (peer->recv_ev_active) {
                opal_event_del(&peer->recv_event);
                peer->recv_ev_active = false;
            }
            if (peer->send_ev_active) {
                opal_event_del(&peer->send_event);
                peer->send_ev_active = false;
            }
            if (0 < peer->sd) {
                CLOSE_THE_SOCKET(peer->sd);
                peer->sd = -1;
            }
            peer->retries = 0;
        }
    } else {
        /* compare the peers name to the expected value */
        if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &peer->name, &sender)) {
            opal_output(0, "%s usock_peer_recv_connect_ack: "
                        "received unexpected process identifier %s from %s\n",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&sender),
                        ORTE_NAME_PRINT(&(peer->name)));
            peer->state = PMIX_SERVER_FAILED;
            CLOSE_THE_SOCKET(peer->sd);
            return ORTE_ERR_UNREACH;
        }
    }

    opal_output_verbose(2, pmix_server_output,
                        "%s connect-ack header from %s is okay",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&peer->name));

    /* get the authentication and version payload */
    if (NULL == (msg = (char*)malloc(hdr.nbytes))) {
        peer->state = PMIX_SERVER_FAILED;
        CLOSE_THE_SOCKET(peer->sd);
        return ORTE_ERR_OUT_OF_RESOURCE;
    }
    memset(msg, 0, hdr.nbytes);
    
    if (!usock_peer_recv_blocking(peer, sd, msg, hdr.nbytes)) {
        /* unable to complete the recv */
        opal_output_verbose(2, pmix_server_output,
                            "%s unable to complete recv of connect-ack from %s ON SOCKET %d",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            ORTE_NAME_PRINT(&peer->name), peer->sd);
        free(msg);
        return ORTE_ERR_UNREACH;
    }

    /* check that this is from a matching version */
    version = (char*)(msg);
    if (0 != strcmp(version, opal_version_string)) {
        opal_output(0, "%s usock_peer_recv_connect_ack: "
                    "received different version from %s: %s instead of %s\n",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                    ORTE_NAME_PRINT(&(peer->name)),
                    version, opal_version_string);
        peer->state = PMIX_SERVER_FAILED;
        CLOSE_THE_SOCKET(peer->sd);
        free(msg);
        return ORTE_ERR_UNREACH;
    }

    opal_output_verbose(2, pmix_server_output,
                        "%s connect-ack version from %s matches ours",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&peer->name));

    /* check security token */
    creds.credential = (char*)(msg + strlen(version) + 1);
    creds.size = strlen(creds.credential);
    if (OPAL_SUCCESS != (rc = opal_sec.authenticate(&creds))) {
        ORTE_ERROR_LOG(rc);
    }
    free(msg);

    opal_output_verbose(2, pmix_server_output,
                        "%s connect-ack %s authenticated",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&peer->name));

    /* if the requestor wanted the header returned, then they
     * will complete their processing
     */
    if (NULL != dhdr) {
        return ORTE_SUCCESS;
    }

    /* connected */
    pmix_server_peer_connected(peer);
    if (2 <= opal_output_get_verbosity(pmix_server_output)) {
        pmix_server_peer_dump(peer, "connected");
    }
    return ORTE_SUCCESS;
}
示例#28
0
static int native_fini(void)
{
    opal_buffer_t *msg;
    pmix_cb_t *cb;
    pmix_cmd_t cmd = PMIX_FINALIZE_CMD;
    int rc;

    if (1 != init_cntr) {
        --init_cntr;
        return OPAL_SUCCESS;
    }
    init_cntr = 0;

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:native finalize called",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    if (NULL == mca_pmix_native_component.uri) {
        /* nothing was setup, so return */
        return OPAL_SUCCESS;
    }

    if (PMIX_USOCK_CONNECTED == mca_pmix_native_component.state) {
        /* setup a cmd message to notify the PMIx
         * server that we are normally terminating */
        msg = OBJ_NEW(opal_buffer_t);
        /* pack the cmd */
        if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &cmd, 1, PMIX_CMD_T))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(msg);
            return rc;
        }

        /* create a callback object as we need to pass it to the
         * recv routine so we know which callback to use when
         * the return message is recvd */
        cb = OBJ_NEW(pmix_cb_t);
        cb->active = true;

        opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                            "%s pmix:native sending finalize sync to server",
                            OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

        /* push the message into our event base to send to the server */
        PMIX_ACTIVATE_SEND_RECV(msg, wait_cbfunc, cb);

        /* wait for the ack to return */
        PMIX_WAIT_FOR_COMPLETION(cb->active);
        OBJ_RELEASE(cb);
    }

    if (NULL != mca_pmix_native_component.evbase) {
        opal_stop_progress_thread("opal_async", true);
        mca_pmix_native_component.evbase = NULL;
    }

    if (0 <= mca_pmix_native_component.sd) {
        CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
    }

    return OPAL_SUCCESS;
}
示例#29
0
int mca_oob_tcp_ping(
    const orte_process_name_t* name,
    const char* uri,
    const struct timeval *timeout)
{
    int sd, flags, rc;
    struct sockaddr_in inaddr;
    fd_set fdset;
    mca_oob_tcp_hdr_t hdr;
    struct timeval tv;
    struct iovec iov;
#ifndef __WINDOWS__
    struct opal_event sigpipe_handler;
#endif

    /* parse uri string */
    if(ORTE_SUCCESS != (rc = mca_oob_tcp_parse_uri(uri, &inaddr))) {
       opal_output(0,
            "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: invalid uri: %s\n",
            ORTE_NAME_ARGS(orte_process_info.my_name),
            ORTE_NAME_ARGS(name),
            uri);
        return rc;
    }

    /* create socket */
    sd = socket(AF_INET, SOCK_STREAM, 0);
    if (sd < 0) {
       opal_output(0,
            "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: socket() failed: %s (%d)\n",
            ORTE_NAME_ARGS(orte_process_info.my_name),
            ORTE_NAME_ARGS(name),
            strerror(opal_socket_errno),
            opal_socket_errno);
        return ORTE_ERR_UNREACH;
    }

    /* setup the socket as non-blocking */
    if((flags = fcntl(sd, F_GETFL, 0)) < 0) {
        opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: fcntl(F_GETFL) failed: %s (%d)\n", 
            ORTE_NAME_ARGS(orte_process_info.my_name),
            ORTE_NAME_ARGS(name),
            strerror(opal_socket_errno),
            opal_socket_errno);
    } else {
        flags |= O_NONBLOCK;
        if(fcntl(sd, F_SETFL, flags) < 0) {
            opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: fcntl(F_SETFL) failed: %s (%d)\n",
                ORTE_NAME_ARGS(orte_process_info.my_name),
                ORTE_NAME_ARGS(name),
                strerror(opal_socket_errno),
                opal_socket_errno);
        }
    }

    /* start the connect - will likely fail with EINPROGRESS */
    FD_ZERO(&fdset);
    if(connect(sd, (struct sockaddr*)&inaddr, sizeof(inaddr)) < 0) {
        /* connect failed? */
        if(opal_socket_errno != EINPROGRESS && opal_socket_errno != EWOULDBLOCK) {
            CLOSE_THE_SOCKET(sd);
            return ORTE_ERR_UNREACH;
        }

        /* select with timeout to wait for connect to complete */
        FD_SET(sd, &fdset);
        tv = *timeout;
        rc = select(sd+1, NULL, &fdset, NULL, &tv);
        if(rc <= 0) {
             CLOSE_THE_SOCKET(sd);
             return ORTE_ERR_UNREACH;
        }
    }

    /* set socket back to blocking */
    flags &= ~O_NONBLOCK;
    if(fcntl(sd, F_SETFL, flags) < 0) {
         opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: fcntl(F_SETFL) failed: %s (%d)\n",
             ORTE_NAME_ARGS(orte_process_info.my_name),
             ORTE_NAME_ARGS(name),
             strerror(opal_socket_errno),
             opal_socket_errno);
    }

    /* send a probe message */
    memset(&hdr, 0, sizeof(hdr));
    if(orte_process_info.my_name != NULL) {
        hdr.msg_src = *orte_process_info.my_name;
    } else {
        hdr.msg_src = *ORTE_NAME_INVALID;
    }
    hdr.msg_dst = *name;
    hdr.msg_type = MCA_OOB_TCP_PROBE;
    MCA_OOB_TCP_HDR_HTON(&hdr);

#ifndef __WINDOWS__
    /* Ignore SIGPIPE in the write -- determine success or failure in
       the ping by looking at the return code from write() */
    opal_signal_set(&sigpipe_handler, SIGPIPE,
                    noop, &sigpipe_handler);
    opal_signal_add(&sigpipe_handler, NULL);
#endif
    /* Do the write and see what happens. Use the writev version just to
     * make Windows happy as there the write function is limitted to
     * file operations.
     */
    iov.iov_base = (IOVBASE_TYPE*)&hdr;
    iov.iov_len  = sizeof(hdr);
    rc = writev(sd, &iov, 1 );
#ifndef __WINDOWS__
    /* Now de-register the handler */
    opal_signal_del(&sigpipe_handler);
#endif
    if (rc != sizeof(hdr)) {
        CLOSE_THE_SOCKET(sd);
        return ORTE_ERR_UNREACH;
    }

    /* select with timeout to wait for response */
    FD_SET(sd, &fdset);
    tv = *timeout;
    rc = select(sd+1, &fdset, NULL, NULL, &tv);
    if(rc <= 0) {
        CLOSE_THE_SOCKET(sd);
        return ORTE_ERR_UNREACH;
    }
    if((rc = read(sd, &hdr, sizeof(hdr))) != sizeof(hdr)) {
        CLOSE_THE_SOCKET(sd);
        return ORTE_ERR_UNREACH;
    }
    MCA_OOB_TCP_HDR_NTOH(&hdr);
    if(hdr.msg_type != MCA_OOB_TCP_PROBE) {
        CLOSE_THE_SOCKET(sd);
        return ORTE_ERR_UNREACH;
    }
    CLOSE_THE_SOCKET(sd);
    return ORTE_SUCCESS;
}
示例#30
0
static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
                                     pmix_info_t *info, size_t ninfo)
{
    char *evar, **uri, *suri;
    char *filename, *nspace=NULL;
    pmix_rank_t rank = PMIX_RANK_WILDCARD;
    char *p, *p2;
    int sd, rc;
    size_t n;
    char myhost[PMIX_MAXHOSTNAMELEN];
    bool system_level = false;
    bool system_level_only = false;
    pid_t pid = 0;

    pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                        "ptl:tcp: connecting to server");

    /* see if the connection info is in the info array - if
     * so, then that overrides all other options */


    /* if I am a client, then we need to look for the appropriate
     * connection info in the environment */
    if (PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) {
        if (NULL != (evar = getenv("PMIX_SERVER_URI21"))) {
            /* we are talking to a v2.1 server */
            pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V21;
            pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                "V21 SERVER DETECTED");
            /* must use the v21 bfrops module */
            pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module("v21");
            if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) {
                return PMIX_ERR_INIT;
            }
        } else if (NULL != (evar = getenv("PMIX_SERVER_URI2"))) {
            /* we are talking to a v2.0 server */
            pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V20;
            pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                "V20 SERVER DETECTED");
            /* must use the v20 bfrops module */
            pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module("v20");
            if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) {
                return PMIX_ERR_INIT;
            }
        } else {
            /* not us */
            return PMIX_ERR_NOT_SUPPORTED;
        }
        /* the server will be using the same bfrops as us */
        pmix_client_globals.myserver->nptr->compat.bfrops = pmix_globals.mypeer->nptr->compat.bfrops;
        /* mark that we are using the V2 protocol */
        pmix_globals.mypeer->protocol = PMIX_PROTOCOL_V2;

        /* the URI consists of the following elements:
        *    - server nspace.rank
        *    - ptl rendezvous URI
        */
        uri = pmix_argv_split(evar, ';');
        if (2 != pmix_argv_count(uri)) {
            PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
            pmix_argv_free(uri);
            return PMIX_ERR_NOT_SUPPORTED;
        }

        /* set the server nspace */
        p = uri[0];
        if (NULL == (p2 = strchr(p, '.'))) {
            PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
            pmix_argv_free(uri);
            return PMIX_ERR_NOT_SUPPORTED;
        }
        *p2 = '\0';
        ++p2;
        nspace = strdup(p);
        rank = strtoull(p2, NULL, 10);

        /* save the URI, but do not overwrite what we may have received from
         * the info-key directives */
        if (NULL == mca_ptl_tcp_component.super.uri) {
            mca_ptl_tcp_component.super.uri = strdup(uri[1]);
        }
        pmix_argv_free(uri);

        pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                            "ptl:tcp:client attempt connect to %s",
                            mca_ptl_tcp_component.super.uri);

        /* go ahead and try to connect */
        if (PMIX_SUCCESS != (rc = try_connect(&sd))) {
            free(nspace);
            return rc;
        }
        goto complete;

    }

    /* get here if we are a tool - check any provided directives
     * to see where they want us to connect to */
    if (NULL != info) {
        for (n=0; n < ninfo; n++) {
            if (0 == strcmp(info[n].key, PMIX_CONNECT_TO_SYSTEM)) {
                system_level_only = PMIX_INFO_TRUE(&info[n]);
            } else if (0 == strcmp(info[n].key, PMIX_CONNECT_SYSTEM_FIRST)) {
                /* try the system-level */
                system_level = PMIX_INFO_TRUE(&info[n]);
            } else if (0 == strcmp(info[n].key, PMIX_SERVER_PIDINFO)) {
                pid = info[n].value.data.pid;
            } else if (0 == strcmp(info[n].key, PMIX_SERVER_URI)) {
                if (NULL == mca_ptl_tcp_component.super.uri) {
                    free(mca_ptl_tcp_component.super.uri);
                }
                mca_ptl_tcp_component.super.uri = strdup(info[n].value.data.string);
            } else if (0 == strcmp(info[n].key, PMIX_CONNECT_RETRY_DELAY)) {
                mca_ptl_tcp_component.wait_to_connect = info[n].value.data.uint32;
            } else if (0 == strcmp(info[n].key, PMIX_CONNECT_MAX_RETRIES)) {
                mca_ptl_tcp_component.max_retries = info[n].value.data.uint32;
            }
        }
    }
    /* mark that we are using the V2 protocol */
    pmix_globals.mypeer->protocol = PMIX_PROTOCOL_V2;
    gethostname(myhost, sizeof(myhost));
    /* if we were given a URI via MCA param, then look no further */
    if (NULL != mca_ptl_tcp_component.super.uri) {
        /* if the string starts with "file:", then they are pointing
         * us to a file we need to read to get the URI itself */
        if (0 == strncmp(mca_ptl_tcp_component.super.uri, "file:", 5)) {
            pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                "ptl:tcp:tool getting connection info from %s",
                                mca_ptl_tcp_component.super.uri);
            nspace = NULL;
            rc = parse_uri_file(&mca_ptl_tcp_component.super.uri[5], &suri, &nspace, &rank);
            if (PMIX_SUCCESS != rc) {
                return PMIX_ERR_UNREACH;
            }
            free(mca_ptl_tcp_component.super.uri);
            mca_ptl_tcp_component.super.uri = suri;
        } else {
            /* we need to extract the nspace/rank of the server from the string */
            p = strchr(mca_ptl_tcp_component.super.uri, ';');
            if (NULL == p) {
                return PMIX_ERR_BAD_PARAM;
            }
            *p = '\0';
            p++;
            suri = strdup(p); // save the uri portion
            /* the '.' in the first part of the original string separates
             * nspace from rank */
            p = strchr(mca_ptl_tcp_component.super.uri, '.');
            if (NULL == p) {
                free(suri);
                return PMIX_ERR_BAD_PARAM;
            }
            *p = '\0';
            p++;
            nspace = strdup(mca_ptl_tcp_component.super.uri);
            rank = strtoull(p, NULL, 10);
            /* now update the URI */
            free(mca_ptl_tcp_component.super.uri);
            mca_ptl_tcp_component.super.uri = suri;
        }
        pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                            "ptl:tcp:tool attempt connect using given URI %s",
                            mca_ptl_tcp_component.super.uri);
        /* go ahead and try to connect */
        if (PMIX_SUCCESS != (rc = try_connect(&sd))) {
            if (NULL != nspace) {
                free(nspace);
            }
            return rc;
        }
        goto complete;
    }

    /* if they gave us a pid, then look for it */
    if (0 != pid) {
        if (0 > asprintf(&filename, "pmix.%s.tool.%d", myhost, pid)) {
            return PMIX_ERR_NOMEM;
        }
        pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                            "ptl:tcp:tool searching for given session server %s",
                            filename);
        nspace = NULL;
        rc = df_search(mca_ptl_tcp_component.system_tmpdir,
                       filename, &sd, &nspace, &rank);
        free(filename);
        if (PMIX_SUCCESS == rc) {
            goto complete;
        }
        if (NULL != nspace) {
            free(nspace);
        }
        /* since they gave us a specific pid and we couldn't
         * connect to it, return an error */
        return PMIX_ERR_UNREACH;
    }


    /* if they asked for system-level, we start there */
    if (system_level || system_level_only) {
        if (0 > asprintf(&filename, "%s/pmix.sys.%s", mca_ptl_tcp_component.system_tmpdir, myhost)) {
            return PMIX_ERR_NOMEM;
        }
        pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                            "ptl:tcp:tool looking for system server at %s",
                            filename);
        /* try to read the file */
        rc = parse_uri_file(filename, &suri, &nspace, &rank);
        free(filename);
        if (PMIX_SUCCESS == rc) {
            mca_ptl_tcp_component.super.uri = suri;
            pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                "ptl:tcp:tool attempt connect to system server at %s",
                                mca_ptl_tcp_component.super.uri);
            /* go ahead and try to connect */
            if (PMIX_SUCCESS == try_connect(&sd)) {
                goto complete;
            }
            free(nspace);
        }
    }

    /* we get here if they either didn't ask for a system-level connection,
     * or they asked for it and it didn't succeed. If they _only_ wanted
     * a system-level connection, then we are done */
    if (system_level_only) {
        pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                            "ptl:tcp: connecting to system failed");
        return PMIX_ERR_UNREACH;
    }

    /* they didn't give us a pid, so we will search to see what session-level
     * tools are available to this user. We will take the first connection
     * that succeeds - this is based on the likelihood that there is only
     * one session per user on a node */

    if (0 > asprintf(&filename, "pmix.%s.tool", myhost)) {
        return PMIX_ERR_NOMEM;
    }
    pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                        "ptl:tcp:tool searching for session server %s",
                        filename);
    nspace = NULL;
    rc = df_search(mca_ptl_tcp_component.system_tmpdir,
                   filename, &sd, &nspace, &rank);
    free(filename);
    if (PMIX_SUCCESS != rc) {
        if (NULL != nspace){
            free(nspace);
        }
        return PMIX_ERR_UNREACH;
    }

  complete:
    pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                        "sock_peer_try_connect: Connection across to server succeeded");

    /* do a final bozo check */
    if (NULL == nspace || PMIX_RANK_WILDCARD == rank) {
        if (NULL != nspace) {
            free(nspace);
        }
        CLOSE_THE_SOCKET(sd);
        return PMIX_ERR_UNREACH;
    }
    /* mark the connection as made */
    pmix_globals.connected = true;
    pmix_client_globals.myserver->sd = sd;

    /* setup the server info */
    if (NULL == pmix_client_globals.myserver->info) {
        pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t);
    }
    if (NULL == pmix_client_globals.myserver->nptr) {
        pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t);
    }
    if (NULL == pmix_client_globals.myserver->nptr->nspace) {
        pmix_client_globals.myserver->nptr->nspace = nspace;
    } else {
        free(nspace);
    }
    if (NULL == pmix_client_globals.myserver->info->pname.nspace) {
        pmix_client_globals.myserver->info->pname.nspace = strdup(pmix_client_globals.myserver->nptr->nspace);
    }
    pmix_client_globals.myserver->info->pname.rank = rank;

    pmix_ptl_base_set_nonblocking(sd);

    /* setup recv event */
    pmix_event_assign(&pmix_client_globals.myserver->recv_event,
                      pmix_globals.evbase,
                      pmix_client_globals.myserver->sd,
                      EV_READ | EV_PERSIST,
                      pmix_ptl_base_recv_handler, pmix_client_globals.myserver);
    pmix_client_globals.myserver->recv_ev_active = true;
    PMIX_POST_OBJECT(pmix_client_globals.myserver);
    pmix_event_add(&pmix_client_globals.myserver->recv_event, 0);

    /* setup send event */
    pmix_event_assign(&pmix_client_globals.myserver->send_event,
                      pmix_globals.evbase,
                      pmix_client_globals.myserver->sd,
                      EV_WRITE|EV_PERSIST,
                      pmix_ptl_base_send_handler, pmix_client_globals.myserver);
    pmix_client_globals.myserver->send_ev_active = false;

    return PMIX_SUCCESS;
}