Example #1
0
static void pmix_usock_send(int sd, short args, void *cbdata)
{
    pmix_ptl_queue_t *queue = (pmix_ptl_queue_t*)cbdata;
    pmix_ptl_send_t *snd;

    /* acquire the object */
    PMIX_ACQUIRE_OBJECT(queue);

    if (NULL == queue->peer || queue->peer->sd < 0 ||
        NULL == queue->peer->info || NULL == queue->peer->nptr) {
        /* this peer has lost connection */
        PMIX_RELEASE(queue);
        /* ensure we post the object before another thread
         * picks it back up */
        PMIX_POST_OBJECT(queue);
        return;
    }

    pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                        "[%s:%d] send to %s:%u on tag %d",
                        __FILE__, __LINE__,
                        (queue->peer)->info->pname.nspace,
                        (queue->peer)->info->pname.rank, (queue->tag));

    snd = PMIX_NEW(pmix_ptl_send_t);
    snd->hdr.pindex = htonl(pmix_globals.pindex);
    snd->hdr.tag = htonl(queue->tag);
    snd->hdr.nbytes = htonl((queue->buf)->bytes_used);
    snd->data = (queue->buf);
    /* always start with the header */
    snd->sdptr = (char*)&snd->hdr;
    snd->sdbytes = sizeof(pmix_ptl_hdr_t);

    /* if there is no message on-deck, put this one there */
    if (NULL == (queue->peer)->send_msg) {
        (queue->peer)->send_msg = snd;
    } else {
        /* add it to the queue */
        pmix_list_append(&(queue->peer)->send_queue, &snd->super);
    }
    /* ensure the send event is active */
    if (!(queue->peer)->send_ev_active) {
        (queue->peer)->send_ev_active = true;
        PMIX_POST_OBJECT(queue->peer);
        pmix_event_add(&(queue->peer)->send_event, 0);
    }
    PMIX_RELEASE(queue);
    PMIX_POST_OBJECT(snd);
}
Example #2
0
static void lookup_cbfunc(pmix_status_t status, pmix_pdata_t pdata[], size_t ndata,
                          void *cbdata)
{
    pmix_cb_t *cb = (pmix_cb_t*)cbdata;
    pmix_pdata_t *tgt = (pmix_pdata_t*)cb->cbdata;
    size_t i, j;

    PMIX_ACQUIRE_OBJECT(cb);
    cb->status = status;
    if (PMIX_SUCCESS == status) {
        /* find the matching key in the provided info array - error if not found */
        for (i=0; i < ndata; i++) {
            for (j=0; j < cb->nvals; j++) {
                if (0 == strcmp(pdata[i].key, tgt[j].key)) {
                    /* transfer the publishing proc id */
                    (void)strncpy(tgt[j].proc.nspace, pdata[i].proc.nspace, PMIX_MAX_NSLEN);
                    tgt[j].proc.rank = pdata[i].proc.rank;
                    /* transfer the value to the pmix_info_t */
                    PMIX_BFROPS_VALUE_XFER(cb->status, pmix_client_globals.myserver, &tgt[j].value, &pdata[i].value);
                    break;
                }
            }
        }
    }
    PMIX_POST_OBJECT(cb);
    PMIX_WAKEUP_THREAD(&cb->lock);
}
Example #3
0
static void op_cbfunc(pmix_status_t status, void *cbdata)
{
    pmix_cb_t *cb = (pmix_cb_t*)cbdata;

    cb->status = status;
    PMIX_POST_OBJECT(cb);
    PMIX_WAKEUP_THREAD(&cb->lock);
}
Example #4
0
static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
                                     pmix_info_t *info, size_t ninfo)
{
    struct sockaddr_un *address;
    char *evar, **uri;
    pmix_status_t rc;
    int sd;
    pmix_socklen_t len;
    bool retried = false;

    pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                        "[%s:%d] connect to server",
                        __FILE__, __LINE__);

    /* if we are not a client, there is nothing we can do */
    if (!PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) {
        return PMIX_ERR_NOT_SUPPORTED;
    }

    /* if we don't have a path to the daemon rendezvous point,
     * then we need to return an error */
    if (NULL != (evar = getenv("PMIX_SERVER_URI2USOCK"))) {
        /* this is a v2.1+ server */
        pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module("v21");
        if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) {
            return PMIX_ERR_INIT;
        }
    } else if (NULL != (evar = getenv("PMIX_SERVER_URI"))) {
        /* this is a pre-v2.1 server - must use the v12 bfrops module */
        pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module("v12");
        if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) {
            return PMIX_ERR_INIT;
        }
    } else {
        /* let the caller know that the server isn't available */
        return PMIX_ERR_SERVER_NOT_AVAIL;
    }
    /* the server will be using the same bfrops as us */
    pmix_client_globals.myserver->nptr->compat.bfrops = pmix_globals.mypeer->nptr->compat.bfrops;
    /* mark that we are using the V1 protocol */
    pmix_globals.mypeer->protocol = PMIX_PROTOCOL_V1;

    uri = pmix_argv_split(evar, ':');
    if (3 != pmix_argv_count(uri)) {
        pmix_argv_free(uri);
        PMIX_ERROR_LOG(PMIX_ERROR);
        return PMIX_ERROR;
    }
    /* set the server nspace */
    if (NULL == pmix_client_globals.myserver->info) {
        pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t);
    }
    if (NULL == pmix_client_globals.myserver->nptr) {
        pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_namespace_t);
    }
    if (NULL == pmix_client_globals.myserver->nptr->nspace) {
        pmix_client_globals.myserver->nptr->nspace = strdup(uri[0]);
    }
    if (NULL == pmix_client_globals.myserver->info->pname.nspace) {
        pmix_client_globals.myserver->info->pname.nspace = strdup(uri[0]);
    }

    /* set the server rank */
    pmix_client_globals.myserver->info->pname.rank = strtoull(uri[1], NULL, 10);

    /* setup the path to the daemon rendezvous point */
    memset(&mca_ptl_usock_component.connection, 0, sizeof(struct sockaddr_storage));
    address = (struct sockaddr_un*)&mca_ptl_usock_component.connection;
    address->sun_family = AF_UNIX;
    snprintf(address->sun_path, sizeof(address->sun_path)-1, "%s", uri[2]);
    /* if the rendezvous file doesn't exist, that's an error */
    if (0 != access(uri[2], R_OK)) {
        pmix_argv_free(uri);
        PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND);
        return PMIX_ERR_NOT_FOUND;
    }
    pmix_argv_free(uri);

  retry:
    /* establish the connection */
    len = sizeof(struct sockaddr_un);
    if (PMIX_SUCCESS != (rc = pmix_ptl_base_connect(&mca_ptl_usock_component.connection, len, &sd))) {
        PMIX_ERROR_LOG(rc);
        return rc;
    }
    pmix_client_globals.myserver->sd = sd;

    /* send our identity and any authentication credentials to the server */
    if (PMIX_SUCCESS != (rc = send_connect_ack(sd))) {
        CLOSE_THE_SOCKET(sd);
        return rc;
    }

    /* do whatever handshake is required */
    if (PMIX_SUCCESS != (rc = recv_connect_ack(sd))) {
        CLOSE_THE_SOCKET(sd);
        if (PMIX_ERR_TEMP_UNAVAILABLE == rc) {
            /* give it two tries */
            if (!retried) {
                retried = true;
                goto retry;
            }
        }
        return rc;
    }

    pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                        "sock_peer_try_connect: Connection across to server succeeded");

    /* mark the connection as made */
    pmix_globals.connected = true;

    pmix_ptl_base_set_nonblocking(sd);

    /* setup recv event */
    pmix_event_assign(&pmix_client_globals.myserver->recv_event,
                      pmix_globals.evbase,
                      pmix_client_globals.myserver->sd,
                      EV_READ | EV_PERSIST,
                      pmix_usock_recv_handler, pmix_client_globals.myserver);
    pmix_event_add(&pmix_client_globals.myserver->recv_event, 0);
    pmix_client_globals.myserver->recv_ev_active = true;
    PMIX_POST_OBJECT(pmix_client_globals.myserver);
    pmix_event_add(&pmix_client_globals.myserver->recv_event, 0);

    /* setup send event */
    pmix_event_assign(&pmix_client_globals.myserver->send_event,
                      pmix_globals.evbase,
                      pmix_client_globals.myserver->sd,
                      EV_WRITE|EV_PERSIST,
                      pmix_usock_send_handler, pmix_client_globals.myserver);
    pmix_client_globals.myserver->send_ev_active = false;

    return PMIX_SUCCESS;
}
Example #5
0
void pmix_usock_send_recv(int fd, short args, void *cbdata)
{
    pmix_ptl_sr_t *ms = (pmix_ptl_sr_t*)cbdata;
    pmix_ptl_posted_recv_t *req;
    pmix_ptl_send_t *snd;
    uint32_t tag;

    /* acquire the object */
    PMIX_ACQUIRE_OBJECT(ms);

    if (ms->peer->sd < 0) {
        /* this peer's socket has been closed */
        PMIX_RELEASE(ms);
        /* ensure we post the object before another thread
         * picks it back up */
        PMIX_POST_OBJECT(NULL);
        return;
    }

    /* take the next tag in the sequence */
    pmix_ptl_globals.current_tag++;
    if (UINT32_MAX == pmix_ptl_globals.current_tag ) {
        pmix_ptl_globals.current_tag = PMIX_PTL_TAG_DYNAMIC;
    }
    tag = pmix_ptl_globals.current_tag;

    if (NULL != ms->cbfunc) {
        /* if a callback msg is expected, setup a recv for it */
        req = PMIX_NEW(pmix_ptl_posted_recv_t);
        req->tag = tag;
        req->cbfunc = ms->cbfunc;
        req->cbdata = ms->cbdata;
        pmix_output_verbose(5, pmix_ptl_base_framework.framework_output,
                            "posting recv on tag %d", req->tag);
        /* add it to the list of recvs - we cannot have unexpected messages
         * in this subsystem as the server never sends us something that
         * we didn't previously request */
        pmix_list_prepend(&pmix_ptl_globals.posted_recvs, &req->super);
    }

    snd = PMIX_NEW(pmix_ptl_send_t);
    snd->hdr.pindex = pmix_globals.pindex;
    snd->hdr.tag = tag;
    snd->hdr.nbytes = ms->bfr->bytes_used;
    snd->data = ms->bfr;
    /* always start with the header */
    snd->sdptr = (char*)&snd->hdr;
    snd->sdbytes = sizeof(pmix_usock_hdr_t);

    /* if there is no message on-deck, put this one there */
    if (NULL == ms->peer->send_msg) {
        ms->peer->send_msg = snd;
    } else {
        /* add it to the queue */
        pmix_list_append(&ms->peer->send_queue, &snd->super);
    }
    /* ensure the send event is active */
    if (!ms->peer->send_ev_active) {
        ms->peer->send_ev_active = true;
        PMIX_POST_OBJECT(snd);
        pmix_event_add(&ms->peer->send_event, 0);
    }
    /* cleanup */
    PMIX_RELEASE(ms);
    PMIX_POST_OBJECT(snd);
}
Example #6
0
void pmix_usock_recv_handler(int sd, short flags, void *cbdata)
{
    pmix_status_t rc;
    pmix_peer_t *peer = (pmix_peer_t*)cbdata;
    pmix_ptl_recv_t *msg = NULL;

    /* acquire the object */
    PMIX_ACQUIRE_OBJECT(peer);

    pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                        "usock:recv:handler called with peer %s:%d",
                        (NULL == peer) ? "NULL" : peer->info->pname.nspace,
                        (NULL == peer) ? PMIX_RANK_UNDEF : peer->info->pname.rank);

    if (NULL == peer) {
        return;
    }
    /* allocate a new message and setup for recv */
    if (NULL == peer->recv_msg) {
        pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                            "usock:recv:handler allocate new recv msg");
        peer->recv_msg = PMIX_NEW(pmix_ptl_recv_t);
        if (NULL == peer->recv_msg) {
            pmix_output(0, "usock_recv_handler: unable to allocate recv message\n");
            goto err_close;
        }
        PMIX_RETAIN(peer);
        peer->recv_msg->peer = peer;  // provide a handle back to the peer object
        /* start by reading the header */
        peer->recv_msg->rdptr = (char*)&peer->recv_msg->hdr;
        peer->recv_msg->rdbytes = sizeof(pmix_usock_hdr_t);
    }
    msg = peer->recv_msg;
    msg->sd = sd;
    /* if the header hasn't been completely read, read it */
    if (!msg->hdr_recvd) {
        pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                            "usock:recv:handler read hdr on socket %d", peer->sd);
        if (PMIX_SUCCESS == (rc = read_bytes(peer->sd, &msg->rdptr, &msg->rdbytes))) {
            /* completed reading the header */
            peer->recv_msg->hdr_recvd = true;
            pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                "RECVD MSG FOR TAG %d SIZE %d",
                                (int)peer->recv_msg->hdr.tag,
                                (int)peer->recv_msg->hdr.nbytes);
            /* if this is a zero-byte message, then we are done */
            if (0 == peer->recv_msg->hdr.nbytes) {
                pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                    "RECVD ZERO-BYTE MESSAGE FROM %s:%d for tag %d",
                                    peer->info->pname.nspace, peer->info->pname.rank,
                                    peer->recv_msg->hdr.tag);
                peer->recv_msg->data = NULL;  // make sure
                peer->recv_msg->rdptr = NULL;
                peer->recv_msg->rdbytes = 0;
                /* post it for delivery */
                PMIX_ACTIVATE_POST_MSG(peer->recv_msg);
                peer->recv_msg = NULL;
                PMIX_POST_OBJECT(peer);
                return;
            } else {
                pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                    "usock:recv:handler allocate data region of size %lu",
                                    (unsigned long)peer->recv_msg->hdr.nbytes);
                /* allocate the data region */
                peer->recv_msg->data = (char*)malloc(peer->recv_msg->hdr.nbytes);
                memset(peer->recv_msg->data, 0, peer->recv_msg->hdr.nbytes);
                /* point to it */
                peer->recv_msg->rdptr = peer->recv_msg->data;
                peer->recv_msg->rdbytes = peer->recv_msg->hdr.nbytes;
            }
            /* fall thru and attempt to read the data */
        } else if (PMIX_ERR_RESOURCE_BUSY == rc ||
                   PMIX_ERR_WOULD_BLOCK == rc) {
            /* exit this event and let the event lib progress */
            return;
        } else {
            /* the remote peer closed the connection - report that condition
             * and let the caller know
             */
            pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                "pmix_usock_msg_recv: peer closed connection");
            goto err_close;
        }
    }

    if (peer->recv_msg->hdr_recvd) {
        /* continue to read the data block - we start from
         * wherever we left off, which could be at the
         * beginning or somewhere in the message
         */
        if (PMIX_SUCCESS == (rc = read_bytes(peer->sd, &msg->rdptr, &msg->rdbytes))) {
            /* we recvd all of the message */
            pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                "RECVD COMPLETE MESSAGE FROM SERVER OF %d BYTES FOR TAG %d ON PEER SOCKET %d",
                                (int)peer->recv_msg->hdr.nbytes,
                                peer->recv_msg->hdr.tag, peer->sd);
            /* post it for delivery */
            PMIX_ACTIVATE_POST_MSG(peer->recv_msg);
            peer->recv_msg = NULL;
            /* ensure we post the modified peer object before another thread
             * picks it back up */
            PMIX_POST_OBJECT(peer);
            return;
        } else if (PMIX_ERR_RESOURCE_BUSY == rc ||
                   PMIX_ERR_WOULD_BLOCK == rc) {
            /* exit this event and let the event lib progress */
            /* ensure we post the modified peer object before another thread
             * picks it back up */
            PMIX_POST_OBJECT(peer);
            return;
        } else {
            /* the remote peer closed the connection - report that condition
             * and let the caller know
             */
            pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                "pmix_usock_msg_recv: peer closed connection");
            goto err_close;
        }
    }
    /* success */
    return;

  err_close:
    /* stop all events */
    if (peer->recv_ev_active) {
        pmix_event_del(&peer->recv_event);
        peer->recv_ev_active = false;
    }
    if (peer->send_ev_active) {
        pmix_event_del(&peer->send_event);
        peer->send_ev_active = false;
    }
    if (NULL != peer->recv_msg) {
        PMIX_RELEASE(peer->recv_msg);
        peer->recv_msg = NULL;
    }
    pmix_ptl_base_lost_connection(peer, PMIX_ERR_UNREACH);
    /* ensure we post the modified peer object before another thread
     * picks it back up */
    PMIX_POST_OBJECT(peer);
}
Example #7
0
/*
 * A file descriptor is available/ready for send. Check the state
 * of the socket and take the appropriate action.
 */
void pmix_usock_send_handler(int sd, short flags, void *cbdata)
{
    pmix_peer_t *peer = (pmix_peer_t*)cbdata;
    pmix_ptl_send_t *msg = peer->send_msg;
    pmix_status_t rc;
    uint32_t nbytes;

    /* acquire the object */
    PMIX_ACQUIRE_OBJECT(peer);

    pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                        "%s:%d usock:send_handler SENDING TO PEER %s:%d tag %u with %s msg",
                        pmix_globals.myid.nspace, pmix_globals.myid.rank,
                        peer->info->pname.nspace, peer->info->pname.rank,
                        (NULL == msg) ? UINT_MAX : msg->hdr.tag,
                        (NULL == msg) ? "NULL" : "NON-NULL");

    if (NULL != msg) {
        if (!msg->hdr_sent) {
            if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) {
            /* we have to convert the header back to host-byte order */
                msg->hdr.pindex = ntohl(msg->hdr.pindex);
                msg->hdr.tag = ntohl(msg->hdr.tag);
                nbytes = msg->hdr.nbytes;
                msg->hdr.nbytes = ntohl(nbytes);
            }
            pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                "usock:send_handler SENDING HEADER WITH MSG IDX %d TAG %d SIZE %lu",
                                msg->hdr.pindex, msg->hdr.tag, (unsigned long)msg->hdr.nbytes);
            if (PMIX_SUCCESS == (rc = send_bytes(peer->sd, &msg->sdptr, &msg->sdbytes))) {
                /* header is completely sent */
                pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                    "usock:send_handler HEADER SENT");
                msg->hdr_sent = true;
                /* setup to send the data */
                if (NULL == msg->data) {
                    /* this was a zero-byte msg - nothing more to do */
                    PMIX_RELEASE(msg);
                    peer->send_msg = NULL;
                    goto next;
                } else {
                    /* send the data as a single block */
                    msg->sdptr = msg->data->base_ptr;
                    msg->sdbytes = msg->hdr.nbytes;
                }
                /* fall thru and let the send progress */
            } else if (PMIX_ERR_RESOURCE_BUSY == rc ||
                       PMIX_ERR_WOULD_BLOCK == rc) {
                /* exit this event and let the event lib progress */
                pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                    "usock:send_handler RES BUSY OR WOULD BLOCK");
                if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) {
                    /* have to convert back again so we are correct when we re-enter */
                    msg->hdr.pindex = htonl(msg->hdr.pindex);
                    msg->hdr.tag = htonl(msg->hdr.tag);
                    nbytes = msg->hdr.nbytes;
                    msg->hdr.nbytes = htonl(nbytes);
                }
                /* ensure we post the modified peer object before another thread
                 * picks it back up */
                PMIX_POST_OBJECT(peer);
                return;
            } else {
                // report the error
                event_del(&peer->send_event);
                peer->send_ev_active = false;
                PMIX_RELEASE(msg);
                peer->send_msg = NULL;
                pmix_ptl_base_lost_connection(peer, rc);
                /* ensure we post the modified peer object before another thread
                 * picks it back up */
                PMIX_POST_OBJECT(peer);
                return;
            }
        }

        if (msg->hdr_sent) {
            pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                "usock:send_handler SENDING BODY OF MSG");
            if (PMIX_SUCCESS == (rc = send_bytes(peer->sd, &msg->sdptr, &msg->sdbytes))) {
                // message is complete
                pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                    "usock:send_handler BODY SENT");
                PMIX_RELEASE(msg);
                peer->send_msg = NULL;
            } else if (PMIX_ERR_RESOURCE_BUSY == rc ||
                       PMIX_ERR_WOULD_BLOCK == rc) {
                /* exit this event and let the event lib progress */
                pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                    "usock:send_handler RES BUSY OR WOULD BLOCK");
                /* ensure we post the modified peer object before another thread
                 * picks it back up */
                PMIX_POST_OBJECT(peer);
                return;
            } else {
                // report the error
                pmix_output(0, "pmix_usock_peer_send_handler: unable to send message ON SOCKET %d",
                            peer->sd);
                pmix_event_del(&peer->send_event);
                peer->send_ev_active = false;
                PMIX_RELEASE(msg);
                peer->send_msg = NULL;
                pmix_ptl_base_lost_connection(peer, rc);
                /* ensure we post the modified peer object before another thread
                 * picks it back up */
                PMIX_POST_OBJECT(peer);
                return;
            }
        }

    next:
        /* if current message completed - progress any pending sends by
         * moving the next in the queue into the "on-deck" position. Note
         * that this doesn't mean we send the message right now - we will
         * wait for another send_event to fire before doing so. This gives
         * us a chance to service any pending recvs.
         */
        peer->send_msg = (pmix_ptl_send_t*)
            pmix_list_remove_first(&peer->send_queue);
    }

    /* if nothing else to do unregister for send event notifications */
    if (NULL == peer->send_msg && peer->send_ev_active) {
        pmix_event_del(&peer->send_event);
        peer->send_ev_active = false;
    }

    /* ensure we post the modified peer object before another thread
     * picks it back up */
    PMIX_POST_OBJECT(peer);
}
Example #8
0
static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
                                     pmix_info_t *info, size_t ninfo)
{
    char *evar, **uri, *suri;
    char *filename, *nspace=NULL;
    pmix_rank_t rank = PMIX_RANK_WILDCARD;
    char *p, *p2;
    int sd, rc;
    size_t n;
    char myhost[PMIX_MAXHOSTNAMELEN];
    bool system_level = false;
    bool system_level_only = false;
    pid_t pid = 0;

    pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                        "ptl:tcp: connecting to server");

    /* see if the connection info is in the info array - if
     * so, then that overrides all other options */


    /* if I am a client, then we need to look for the appropriate
     * connection info in the environment */
    if (PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) {
        if (NULL != (evar = getenv("PMIX_SERVER_URI21"))) {
            /* we are talking to a v2.1 server */
            pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V21;
            pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                "V21 SERVER DETECTED");
            /* must use the v21 bfrops module */
            pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module("v21");
            if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) {
                return PMIX_ERR_INIT;
            }
        } else if (NULL != (evar = getenv("PMIX_SERVER_URI2"))) {
            /* we are talking to a v2.0 server */
            pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V20;
            pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                "V20 SERVER DETECTED");
            /* must use the v20 bfrops module */
            pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module("v20");
            if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) {
                return PMIX_ERR_INIT;
            }
        } else {
            /* not us */
            return PMIX_ERR_NOT_SUPPORTED;
        }
        /* the server will be using the same bfrops as us */
        pmix_client_globals.myserver->nptr->compat.bfrops = pmix_globals.mypeer->nptr->compat.bfrops;
        /* mark that we are using the V2 protocol */
        pmix_globals.mypeer->protocol = PMIX_PROTOCOL_V2;

        /* the URI consists of the following elements:
        *    - server nspace.rank
        *    - ptl rendezvous URI
        */
        uri = pmix_argv_split(evar, ';');
        if (2 != pmix_argv_count(uri)) {
            PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
            pmix_argv_free(uri);
            return PMIX_ERR_NOT_SUPPORTED;
        }

        /* set the server nspace */
        p = uri[0];
        if (NULL == (p2 = strchr(p, '.'))) {
            PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
            pmix_argv_free(uri);
            return PMIX_ERR_NOT_SUPPORTED;
        }
        *p2 = '\0';
        ++p2;
        nspace = strdup(p);
        rank = strtoull(p2, NULL, 10);

        /* save the URI, but do not overwrite what we may have received from
         * the info-key directives */
        if (NULL == mca_ptl_tcp_component.super.uri) {
            mca_ptl_tcp_component.super.uri = strdup(uri[1]);
        }
        pmix_argv_free(uri);

        pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                            "ptl:tcp:client attempt connect to %s",
                            mca_ptl_tcp_component.super.uri);

        /* go ahead and try to connect */
        if (PMIX_SUCCESS != (rc = try_connect(&sd))) {
            free(nspace);
            return rc;
        }
        goto complete;

    }

    /* get here if we are a tool - check any provided directives
     * to see where they want us to connect to */
    if (NULL != info) {
        for (n=0; n < ninfo; n++) {
            if (0 == strcmp(info[n].key, PMIX_CONNECT_TO_SYSTEM)) {
                system_level_only = PMIX_INFO_TRUE(&info[n]);
            } else if (0 == strcmp(info[n].key, PMIX_CONNECT_SYSTEM_FIRST)) {
                /* try the system-level */
                system_level = PMIX_INFO_TRUE(&info[n]);
            } else if (0 == strcmp(info[n].key, PMIX_SERVER_PIDINFO)) {
                pid = info[n].value.data.pid;
            } else if (0 == strcmp(info[n].key, PMIX_SERVER_URI)) {
                if (NULL == mca_ptl_tcp_component.super.uri) {
                    free(mca_ptl_tcp_component.super.uri);
                }
                mca_ptl_tcp_component.super.uri = strdup(info[n].value.data.string);
            } else if (0 == strcmp(info[n].key, PMIX_CONNECT_RETRY_DELAY)) {
                mca_ptl_tcp_component.wait_to_connect = info[n].value.data.uint32;
            } else if (0 == strcmp(info[n].key, PMIX_CONNECT_MAX_RETRIES)) {
                mca_ptl_tcp_component.max_retries = info[n].value.data.uint32;
            }
        }
    }
    /* mark that we are using the V2 protocol */
    pmix_globals.mypeer->protocol = PMIX_PROTOCOL_V2;
    gethostname(myhost, sizeof(myhost));
    /* if we were given a URI via MCA param, then look no further */
    if (NULL != mca_ptl_tcp_component.super.uri) {
        /* if the string starts with "file:", then they are pointing
         * us to a file we need to read to get the URI itself */
        if (0 == strncmp(mca_ptl_tcp_component.super.uri, "file:", 5)) {
            pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                "ptl:tcp:tool getting connection info from %s",
                                mca_ptl_tcp_component.super.uri);
            nspace = NULL;
            rc = parse_uri_file(&mca_ptl_tcp_component.super.uri[5], &suri, &nspace, &rank);
            if (PMIX_SUCCESS != rc) {
                return PMIX_ERR_UNREACH;
            }
            free(mca_ptl_tcp_component.super.uri);
            mca_ptl_tcp_component.super.uri = suri;
        } else {
            /* we need to extract the nspace/rank of the server from the string */
            p = strchr(mca_ptl_tcp_component.super.uri, ';');
            if (NULL == p) {
                return PMIX_ERR_BAD_PARAM;
            }
            *p = '\0';
            p++;
            suri = strdup(p); // save the uri portion
            /* the '.' in the first part of the original string separates
             * nspace from rank */
            p = strchr(mca_ptl_tcp_component.super.uri, '.');
            if (NULL == p) {
                free(suri);
                return PMIX_ERR_BAD_PARAM;
            }
            *p = '\0';
            p++;
            nspace = strdup(mca_ptl_tcp_component.super.uri);
            rank = strtoull(p, NULL, 10);
            /* now update the URI */
            free(mca_ptl_tcp_component.super.uri);
            mca_ptl_tcp_component.super.uri = suri;
        }
        pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                            "ptl:tcp:tool attempt connect using given URI %s",
                            mca_ptl_tcp_component.super.uri);
        /* go ahead and try to connect */
        if (PMIX_SUCCESS != (rc = try_connect(&sd))) {
            if (NULL != nspace) {
                free(nspace);
            }
            return rc;
        }
        goto complete;
    }

    /* if they gave us a pid, then look for it */
    if (0 != pid) {
        if (0 > asprintf(&filename, "pmix.%s.tool.%d", myhost, pid)) {
            return PMIX_ERR_NOMEM;
        }
        pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                            "ptl:tcp:tool searching for given session server %s",
                            filename);
        nspace = NULL;
        rc = df_search(mca_ptl_tcp_component.system_tmpdir,
                       filename, &sd, &nspace, &rank);
        free(filename);
        if (PMIX_SUCCESS == rc) {
            goto complete;
        }
        if (NULL != nspace) {
            free(nspace);
        }
        /* since they gave us a specific pid and we couldn't
         * connect to it, return an error */
        return PMIX_ERR_UNREACH;
    }


    /* if they asked for system-level, we start there */
    if (system_level || system_level_only) {
        if (0 > asprintf(&filename, "%s/pmix.sys.%s", mca_ptl_tcp_component.system_tmpdir, myhost)) {
            return PMIX_ERR_NOMEM;
        }
        pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                            "ptl:tcp:tool looking for system server at %s",
                            filename);
        /* try to read the file */
        rc = parse_uri_file(filename, &suri, &nspace, &rank);
        free(filename);
        if (PMIX_SUCCESS == rc) {
            mca_ptl_tcp_component.super.uri = suri;
            pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                                "ptl:tcp:tool attempt connect to system server at %s",
                                mca_ptl_tcp_component.super.uri);
            /* go ahead and try to connect */
            if (PMIX_SUCCESS == try_connect(&sd)) {
                goto complete;
            }
            free(nspace);
        }
    }

    /* we get here if they either didn't ask for a system-level connection,
     * or they asked for it and it didn't succeed. If they _only_ wanted
     * a system-level connection, then we are done */
    if (system_level_only) {
        pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                            "ptl:tcp: connecting to system failed");
        return PMIX_ERR_UNREACH;
    }

    /* they didn't give us a pid, so we will search to see what session-level
     * tools are available to this user. We will take the first connection
     * that succeeds - this is based on the likelihood that there is only
     * one session per user on a node */

    if (0 > asprintf(&filename, "pmix.%s.tool", myhost)) {
        return PMIX_ERR_NOMEM;
    }
    pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                        "ptl:tcp:tool searching for session server %s",
                        filename);
    nspace = NULL;
    rc = df_search(mca_ptl_tcp_component.system_tmpdir,
                   filename, &sd, &nspace, &rank);
    free(filename);
    if (PMIX_SUCCESS != rc) {
        if (NULL != nspace){
            free(nspace);
        }
        return PMIX_ERR_UNREACH;
    }

  complete:
    pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
                        "sock_peer_try_connect: Connection across to server succeeded");

    /* do a final bozo check */
    if (NULL == nspace || PMIX_RANK_WILDCARD == rank) {
        if (NULL != nspace) {
            free(nspace);
        }
        CLOSE_THE_SOCKET(sd);
        return PMIX_ERR_UNREACH;
    }
    /* mark the connection as made */
    pmix_globals.connected = true;
    pmix_client_globals.myserver->sd = sd;

    /* setup the server info */
    if (NULL == pmix_client_globals.myserver->info) {
        pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t);
    }
    if (NULL == pmix_client_globals.myserver->nptr) {
        pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t);
    }
    if (NULL == pmix_client_globals.myserver->nptr->nspace) {
        pmix_client_globals.myserver->nptr->nspace = nspace;
    } else {
        free(nspace);
    }
    if (NULL == pmix_client_globals.myserver->info->pname.nspace) {
        pmix_client_globals.myserver->info->pname.nspace = strdup(pmix_client_globals.myserver->nptr->nspace);
    }
    pmix_client_globals.myserver->info->pname.rank = rank;

    pmix_ptl_base_set_nonblocking(sd);

    /* setup recv event */
    pmix_event_assign(&pmix_client_globals.myserver->recv_event,
                      pmix_globals.evbase,
                      pmix_client_globals.myserver->sd,
                      EV_READ | EV_PERSIST,
                      pmix_ptl_base_recv_handler, pmix_client_globals.myserver);
    pmix_client_globals.myserver->recv_ev_active = true;
    PMIX_POST_OBJECT(pmix_client_globals.myserver);
    pmix_event_add(&pmix_client_globals.myserver->recv_event, 0);

    /* setup send event */
    pmix_event_assign(&pmix_client_globals.myserver->send_event,
                      pmix_globals.evbase,
                      pmix_client_globals.myserver->sd,
                      EV_WRITE|EV_PERSIST,
                      pmix_ptl_base_send_handler, pmix_client_globals.myserver);
    pmix_client_globals.myserver->send_ev_active = false;

    return PMIX_SUCCESS;
}
Example #9
0
static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
                                     pmix_info_t *info, size_t ninfo)
{
    char *evar, **uri;
    char *filename, *host;
    FILE *fp;
    char *srvr, *p, *p2;
    struct sockaddr_in *in;
    struct sockaddr_in6 *in6;
    pmix_socklen_t len;
    int sd, rc;

    pmix_output_verbose(2, pmix_globals.debug_output,
                        "ptl:tcp: connecting to server");

    /* see if the connection info is in the info array - if
     * so, then that overrides all other options */


    /* if I am a client, then we need to look for the appropriate
     * connection info in the environment */
    if (PMIX_PROC_IS_CLIENT) {
        if (NULL == (evar = getenv("PMIX_SERVER_URI2"))) {
            /* not us */
            return PMIX_ERR_NOT_SUPPORTED;
        }

        /* the URI consists of  elements:
        *    - server nspace.rank
        *    - ptl rendezvous URI
        */
        uri = pmix_argv_split(evar, ';');
        if (2 != pmix_argv_count(uri)) {
            PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
            pmix_argv_free(uri);
            return PMIX_ERR_NOT_SUPPORTED;
        }

        /* set the server nspace */
        p = uri[0];
        if (NULL == (p2 = strchr(p, '.'))) {
            PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
            pmix_argv_free(uri);
            return PMIX_ERR_NOT_SUPPORTED;
        }
        *p2 = '\0';
        ++p2;
        if (NULL == pmix_client_globals.myserver->info) {
            pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t);
        }
        if (NULL == pmix_client_globals.myserver->nptr) {
            pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t);
        }
        if (NULL == pmix_client_globals.myserver->nptr->nspace) {
            pmix_client_globals.myserver->nptr->nspace = strdup(p);
        }
        if (NULL == pmix_client_globals.myserver->info->pname.nspace) {
            pmix_client_globals.myserver->info->pname.nspace = strdup(p);
        }

        /* set the server rank */
        pmix_client_globals.myserver->info->pname.rank = strtoull(p2, NULL, 10);

        /* save the URI, but do not overwrite what we may have received from
         * the info-key directives */
        if (NULL == mca_ptl_tcp_component.super.uri) {
            mca_ptl_tcp_component.super.uri = strdup(uri[1]);
        }
        pmix_argv_free(uri);

    } else if (PMIX_PROC_IS_TOOL) {
        /* if we already have a URI, then look no further */
        if (NULL == mca_ptl_tcp_component.super.uri) {
            /* we have to discover the connection info,
             * if possible. Start by looking for the connection
             * info in the expected place - if the server supports
             * tool connections via TCP, then there will be a
             * "contact.txt" file under the system tmpdir */
            filename = pmix_os_path(false, mca_ptl_tcp_component.tmpdir, "pmix-contact.txt", NULL);
            if (NULL == filename) {
                return PMIX_ERR_NOMEM;
            }
            fp = fopen(filename, "r");
            if (NULL == fp) {
                /* if we cannot open the file, then the server must not
                 * be configured to support tool connections - so abort */
                free(filename);
                return PMIX_ERR_UNREACH;
            }
            free(filename);
            /* get the URI */
            srvr = pmix_getline(fp);
            if (NULL == srvr) {
                PMIX_ERROR_LOG(PMIX_ERR_FILE_READ_FAILURE);
                fclose(fp);
                return PMIX_ERR_UNREACH;
            }
            fclose(fp);
            /* up to the first ';' is the server nspace/rank */
            if (NULL == (p = strchr(srvr, ';'))) {
                /* malformed */
                free(srvr);
                return PMIX_ERR_UNREACH;
            }
            *p = '\0';
            ++p;  // move past the semicolon
            /* the nspace is the section up to the '.' */
            if (NULL == (p2 = strchr(srvr, '.'))) {
                /* malformed */
                free(srvr);
                return PMIX_ERR_UNREACH;
            }
            *p2 = '\0';
            ++p2;
            /* set the server nspace */
            if (NULL == pmix_client_globals.myserver->info) {
                pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t);
            }
            if (NULL == pmix_client_globals.myserver->nptr) {
                pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t);
            }
            if (NULL == pmix_client_globals.myserver->nptr->nspace) {
                pmix_client_globals.myserver->nptr->nspace = strdup(p);
            }
            if (NULL == pmix_client_globals.myserver->info->pname.nspace) {
                pmix_client_globals.myserver->info->pname.nspace = strdup(p);
            }
            pmix_client_globals.myserver->info->pname.rank = strtoull(p2, NULL, 10);
            /* now parse the uri itself */
            mca_ptl_tcp_component.super.uri = strdup(p);
            free(srvr);
        }
    }

    /* mark that we are the active module for this server */
    pmix_client_globals.myserver->nptr->compat.ptl = &pmix_ptl_tcp_module;

    /* setup the path to the daemon rendezvous point */
    memset(&mca_ptl_tcp_component.connection, 0, sizeof(struct sockaddr_storage));
    if (0 == strncmp(mca_ptl_tcp_component.super.uri, "tcp4", 4)) {
        /* separate the IP address from the port */
        p = strdup(mca_ptl_tcp_component.super.uri);
        if (NULL == p) {
            return PMIX_ERR_NOMEM;
        }
        p2 = strchr(&p[7], ':');
        if (NULL == p2) {
            free(p);
            return PMIX_ERR_BAD_PARAM;
        }
        *p2 = '\0';
        ++p2;
        host = &p[7];
        /* load the address */
        in = (struct sockaddr_in*)&mca_ptl_tcp_component.connection;
        in->sin_family = AF_INET;
        in->sin_addr.s_addr = inet_addr(host);
        if (in->sin_addr.s_addr == INADDR_NONE) {
            free(p);
            return PMIX_ERR_BAD_PARAM;
        }
        in->sin_port = htons(atoi(p2));
        len = sizeof(struct sockaddr_in);
    } else {
        /* separate the IP address from the port */
        p = strdup(mca_ptl_tcp_component.super.uri);
        if (NULL == p) {
            return PMIX_ERR_NOMEM;
        }
        p2 = strchr(&p[7], ':');
        if (NULL == p2) {
            free(p);
            return PMIX_ERR_BAD_PARAM;
        }
        *p2 = '\0';
        if (']' == p[strlen(p)-1]) {
            p[strlen(p)-1] = '\0';
        }
        if ('[' == p[7]) {
            host = &p[8];
        } else {
            host = &p[7];
        }
        /* load the address */
        in6 = (struct sockaddr_in6*)&mca_ptl_tcp_component.connection;
        in6->sin6_family = AF_INET6;
        if (0 == inet_pton(AF_INET6, host, (void*)&in6->sin6_addr)) {
            pmix_output (0, "ptl_tcp_parse_uri: Could not convert %s\n", host);
            free(p);
            return PMIX_ERR_BAD_PARAM;
        }
        in6->sin6_port = htons(atoi(p2));
        len = sizeof(struct sockaddr_in6);
    }
    free(p);

    /* establish the connection */
    if (PMIX_SUCCESS != (rc = pmix_ptl_base_connect(&mca_ptl_tcp_component.connection, len, &sd))) {
        PMIX_ERROR_LOG(rc);
        return rc;
    }
    pmix_client_globals.myserver->sd = sd;

    /* send our identity and any authentication credentials to the server */
    if (PMIX_SUCCESS != (rc = send_connect_ack(sd))) {
        PMIX_ERROR_LOG(rc);
        CLOSE_THE_SOCKET(sd);
        return rc;
    }

    /* do whatever handshake is required */
    if (PMIX_SUCCESS != (rc = recv_connect_ack(sd))) {
        PMIX_ERROR_LOG(rc);
        CLOSE_THE_SOCKET(sd);
        return rc;
    }

    pmix_output_verbose(2, pmix_globals.debug_output,
                        "sock_peer_try_connect: Connection across to server succeeded");

    /* mark the connection as made */
    pmix_globals.connected = true;

    pmix_ptl_base_set_nonblocking(sd);

    /* setup recv event */
    pmix_event_assign(&pmix_client_globals.myserver->recv_event,
                      pmix_globals.evbase,
                      pmix_client_globals.myserver->sd,
                      EV_READ | EV_PERSIST,
                      pmix_ptl_base_recv_handler, pmix_client_globals.myserver);
    pmix_client_globals.myserver->recv_ev_active = true;
    PMIX_POST_OBJECT(pmix_client_globals.myserver);
    pmix_event_add(&pmix_client_globals.myserver->recv_event, 0);

    /* setup send event */
    pmix_event_assign(&pmix_client_globals.myserver->send_event,
                      pmix_globals.evbase,
                      pmix_client_globals.myserver->sd,
                      EV_WRITE|EV_PERSIST,
                      pmix_ptl_base_send_handler, pmix_client_globals.myserver);
    pmix_client_globals.myserver->send_ev_active = false;

    return PMIX_SUCCESS;
}