static void pmix_usock_send(int sd, short args, void *cbdata) { pmix_ptl_queue_t *queue = (pmix_ptl_queue_t*)cbdata; pmix_ptl_send_t *snd; /* acquire the object */ PMIX_ACQUIRE_OBJECT(queue); if (NULL == queue->peer || queue->peer->sd < 0 || NULL == queue->peer->info || NULL == queue->peer->nptr) { /* this peer has lost connection */ PMIX_RELEASE(queue); /* ensure we post the object before another thread * picks it back up */ PMIX_POST_OBJECT(queue); return; } pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "[%s:%d] send to %s:%u on tag %d", __FILE__, __LINE__, (queue->peer)->info->pname.nspace, (queue->peer)->info->pname.rank, (queue->tag)); snd = PMIX_NEW(pmix_ptl_send_t); snd->hdr.pindex = htonl(pmix_globals.pindex); snd->hdr.tag = htonl(queue->tag); snd->hdr.nbytes = htonl((queue->buf)->bytes_used); snd->data = (queue->buf); /* always start with the header */ snd->sdptr = (char*)&snd->hdr; snd->sdbytes = sizeof(pmix_ptl_hdr_t); /* if there is no message on-deck, put this one there */ if (NULL == (queue->peer)->send_msg) { (queue->peer)->send_msg = snd; } else { /* add it to the queue */ pmix_list_append(&(queue->peer)->send_queue, &snd->super); } /* ensure the send event is active */ if (!(queue->peer)->send_ev_active) { (queue->peer)->send_ev_active = true; PMIX_POST_OBJECT(queue->peer); pmix_event_add(&(queue->peer)->send_event, 0); } PMIX_RELEASE(queue); PMIX_POST_OBJECT(snd); }
static void lookup_cbfunc(pmix_status_t status, pmix_pdata_t pdata[], size_t ndata, void *cbdata) { pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_pdata_t *tgt = (pmix_pdata_t*)cb->cbdata; size_t i, j; PMIX_ACQUIRE_OBJECT(cb); cb->status = status; if (PMIX_SUCCESS == status) { /* find the matching key in the provided info array - error if not found */ for (i=0; i < ndata; i++) { for (j=0; j < cb->nvals; j++) { if (0 == strcmp(pdata[i].key, tgt[j].key)) { /* transfer the publishing proc id */ (void)strncpy(tgt[j].proc.nspace, pdata[i].proc.nspace, PMIX_MAX_NSLEN); tgt[j].proc.rank = pdata[i].proc.rank; /* transfer the value to the pmix_info_t */ PMIX_BFROPS_VALUE_XFER(cb->status, pmix_client_globals.myserver, &tgt[j].value, &pdata[i].value); break; } } } } PMIX_POST_OBJECT(cb); PMIX_WAKEUP_THREAD(&cb->lock); }
static void op_cbfunc(pmix_status_t status, void *cbdata) { pmix_cb_t *cb = (pmix_cb_t*)cbdata; cb->status = status; PMIX_POST_OBJECT(cb); PMIX_WAKEUP_THREAD(&cb->lock); }
static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, pmix_info_t *info, size_t ninfo) { struct sockaddr_un *address; char *evar, **uri; pmix_status_t rc; int sd; pmix_socklen_t len; bool retried = false; pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "[%s:%d] connect to server", __FILE__, __LINE__); /* if we are not a client, there is nothing we can do */ if (!PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) { return PMIX_ERR_NOT_SUPPORTED; } /* if we don't have a path to the daemon rendezvous point, * then we need to return an error */ if (NULL != (evar = getenv("PMIX_SERVER_URI2USOCK"))) { /* this is a v2.1+ server */ pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module("v21"); if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) { return PMIX_ERR_INIT; } } else if (NULL != (evar = getenv("PMIX_SERVER_URI"))) { /* this is a pre-v2.1 server - must use the v12 bfrops module */ pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module("v12"); if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) { return PMIX_ERR_INIT; } } else { /* let the caller know that the server isn't available */ return PMIX_ERR_SERVER_NOT_AVAIL; } /* the server will be using the same bfrops as us */ pmix_client_globals.myserver->nptr->compat.bfrops = pmix_globals.mypeer->nptr->compat.bfrops; /* mark that we are using the V1 protocol */ pmix_globals.mypeer->protocol = PMIX_PROTOCOL_V1; uri = pmix_argv_split(evar, ':'); if (3 != pmix_argv_count(uri)) { pmix_argv_free(uri); PMIX_ERROR_LOG(PMIX_ERROR); return PMIX_ERROR; } /* set the server nspace */ if (NULL == pmix_client_globals.myserver->info) { pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); } if (NULL == pmix_client_globals.myserver->nptr) { pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_namespace_t); } if (NULL == pmix_client_globals.myserver->nptr->nspace) { pmix_client_globals.myserver->nptr->nspace = strdup(uri[0]); } if (NULL == pmix_client_globals.myserver->info->pname.nspace) { pmix_client_globals.myserver->info->pname.nspace = strdup(uri[0]); } /* set the server rank */ pmix_client_globals.myserver->info->pname.rank = strtoull(uri[1], NULL, 10); /* setup the path to the daemon rendezvous point */ memset(&mca_ptl_usock_component.connection, 0, sizeof(struct sockaddr_storage)); address = (struct sockaddr_un*)&mca_ptl_usock_component.connection; address->sun_family = AF_UNIX; snprintf(address->sun_path, sizeof(address->sun_path)-1, "%s", uri[2]); /* if the rendezvous file doesn't exist, that's an error */ if (0 != access(uri[2], R_OK)) { pmix_argv_free(uri); PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); return PMIX_ERR_NOT_FOUND; } pmix_argv_free(uri); retry: /* establish the connection */ len = sizeof(struct sockaddr_un); if (PMIX_SUCCESS != (rc = pmix_ptl_base_connect(&mca_ptl_usock_component.connection, len, &sd))) { PMIX_ERROR_LOG(rc); return rc; } pmix_client_globals.myserver->sd = sd; /* send our identity and any authentication credentials to the server */ if (PMIX_SUCCESS != (rc = send_connect_ack(sd))) { CLOSE_THE_SOCKET(sd); return rc; } /* do whatever handshake is required */ if (PMIX_SUCCESS != (rc = recv_connect_ack(sd))) { CLOSE_THE_SOCKET(sd); if (PMIX_ERR_TEMP_UNAVAILABLE == rc) { /* give it two tries */ if (!retried) { retried = true; goto retry; } } return rc; } pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "sock_peer_try_connect: Connection across to server succeeded"); /* mark the connection as made */ pmix_globals.connected = true; pmix_ptl_base_set_nonblocking(sd); /* setup recv event */ pmix_event_assign(&pmix_client_globals.myserver->recv_event, pmix_globals.evbase, pmix_client_globals.myserver->sd, EV_READ | EV_PERSIST, pmix_usock_recv_handler, pmix_client_globals.myserver); pmix_event_add(&pmix_client_globals.myserver->recv_event, 0); pmix_client_globals.myserver->recv_ev_active = true; PMIX_POST_OBJECT(pmix_client_globals.myserver); pmix_event_add(&pmix_client_globals.myserver->recv_event, 0); /* setup send event */ pmix_event_assign(&pmix_client_globals.myserver->send_event, pmix_globals.evbase, pmix_client_globals.myserver->sd, EV_WRITE|EV_PERSIST, pmix_usock_send_handler, pmix_client_globals.myserver); pmix_client_globals.myserver->send_ev_active = false; return PMIX_SUCCESS; }
void pmix_usock_send_recv(int fd, short args, void *cbdata) { pmix_ptl_sr_t *ms = (pmix_ptl_sr_t*)cbdata; pmix_ptl_posted_recv_t *req; pmix_ptl_send_t *snd; uint32_t tag; /* acquire the object */ PMIX_ACQUIRE_OBJECT(ms); if (ms->peer->sd < 0) { /* this peer's socket has been closed */ PMIX_RELEASE(ms); /* ensure we post the object before another thread * picks it back up */ PMIX_POST_OBJECT(NULL); return; } /* take the next tag in the sequence */ pmix_ptl_globals.current_tag++; if (UINT32_MAX == pmix_ptl_globals.current_tag ) { pmix_ptl_globals.current_tag = PMIX_PTL_TAG_DYNAMIC; } tag = pmix_ptl_globals.current_tag; if (NULL != ms->cbfunc) { /* if a callback msg is expected, setup a recv for it */ req = PMIX_NEW(pmix_ptl_posted_recv_t); req->tag = tag; req->cbfunc = ms->cbfunc; req->cbdata = ms->cbdata; pmix_output_verbose(5, pmix_ptl_base_framework.framework_output, "posting recv on tag %d", req->tag); /* add it to the list of recvs - we cannot have unexpected messages * in this subsystem as the server never sends us something that * we didn't previously request */ pmix_list_prepend(&pmix_ptl_globals.posted_recvs, &req->super); } snd = PMIX_NEW(pmix_ptl_send_t); snd->hdr.pindex = pmix_globals.pindex; snd->hdr.tag = tag; snd->hdr.nbytes = ms->bfr->bytes_used; snd->data = ms->bfr; /* always start with the header */ snd->sdptr = (char*)&snd->hdr; snd->sdbytes = sizeof(pmix_usock_hdr_t); /* if there is no message on-deck, put this one there */ if (NULL == ms->peer->send_msg) { ms->peer->send_msg = snd; } else { /* add it to the queue */ pmix_list_append(&ms->peer->send_queue, &snd->super); } /* ensure the send event is active */ if (!ms->peer->send_ev_active) { ms->peer->send_ev_active = true; PMIX_POST_OBJECT(snd); pmix_event_add(&ms->peer->send_event, 0); } /* cleanup */ PMIX_RELEASE(ms); PMIX_POST_OBJECT(snd); }
void pmix_usock_recv_handler(int sd, short flags, void *cbdata) { pmix_status_t rc; pmix_peer_t *peer = (pmix_peer_t*)cbdata; pmix_ptl_recv_t *msg = NULL; /* acquire the object */ PMIX_ACQUIRE_OBJECT(peer); pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "usock:recv:handler called with peer %s:%d", (NULL == peer) ? "NULL" : peer->info->pname.nspace, (NULL == peer) ? PMIX_RANK_UNDEF : peer->info->pname.rank); if (NULL == peer) { return; } /* allocate a new message and setup for recv */ if (NULL == peer->recv_msg) { pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "usock:recv:handler allocate new recv msg"); peer->recv_msg = PMIX_NEW(pmix_ptl_recv_t); if (NULL == peer->recv_msg) { pmix_output(0, "usock_recv_handler: unable to allocate recv message\n"); goto err_close; } PMIX_RETAIN(peer); peer->recv_msg->peer = peer; // provide a handle back to the peer object /* start by reading the header */ peer->recv_msg->rdptr = (char*)&peer->recv_msg->hdr; peer->recv_msg->rdbytes = sizeof(pmix_usock_hdr_t); } msg = peer->recv_msg; msg->sd = sd; /* if the header hasn't been completely read, read it */ if (!msg->hdr_recvd) { pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "usock:recv:handler read hdr on socket %d", peer->sd); if (PMIX_SUCCESS == (rc = read_bytes(peer->sd, &msg->rdptr, &msg->rdbytes))) { /* completed reading the header */ peer->recv_msg->hdr_recvd = true; pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "RECVD MSG FOR TAG %d SIZE %d", (int)peer->recv_msg->hdr.tag, (int)peer->recv_msg->hdr.nbytes); /* if this is a zero-byte message, then we are done */ if (0 == peer->recv_msg->hdr.nbytes) { pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "RECVD ZERO-BYTE MESSAGE FROM %s:%d for tag %d", peer->info->pname.nspace, peer->info->pname.rank, peer->recv_msg->hdr.tag); peer->recv_msg->data = NULL; // make sure peer->recv_msg->rdptr = NULL; peer->recv_msg->rdbytes = 0; /* post it for delivery */ PMIX_ACTIVATE_POST_MSG(peer->recv_msg); peer->recv_msg = NULL; PMIX_POST_OBJECT(peer); return; } else { pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "usock:recv:handler allocate data region of size %lu", (unsigned long)peer->recv_msg->hdr.nbytes); /* allocate the data region */ peer->recv_msg->data = (char*)malloc(peer->recv_msg->hdr.nbytes); memset(peer->recv_msg->data, 0, peer->recv_msg->hdr.nbytes); /* point to it */ peer->recv_msg->rdptr = peer->recv_msg->data; peer->recv_msg->rdbytes = peer->recv_msg->hdr.nbytes; } /* fall thru and attempt to read the data */ } else if (PMIX_ERR_RESOURCE_BUSY == rc || PMIX_ERR_WOULD_BLOCK == rc) { /* exit this event and let the event lib progress */ return; } else { /* the remote peer closed the connection - report that condition * and let the caller know */ pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "pmix_usock_msg_recv: peer closed connection"); goto err_close; } } if (peer->recv_msg->hdr_recvd) { /* continue to read the data block - we start from * wherever we left off, which could be at the * beginning or somewhere in the message */ if (PMIX_SUCCESS == (rc = read_bytes(peer->sd, &msg->rdptr, &msg->rdbytes))) { /* we recvd all of the message */ pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "RECVD COMPLETE MESSAGE FROM SERVER OF %d BYTES FOR TAG %d ON PEER SOCKET %d", (int)peer->recv_msg->hdr.nbytes, peer->recv_msg->hdr.tag, peer->sd); /* post it for delivery */ PMIX_ACTIVATE_POST_MSG(peer->recv_msg); peer->recv_msg = NULL; /* ensure we post the modified peer object before another thread * picks it back up */ PMIX_POST_OBJECT(peer); return; } else if (PMIX_ERR_RESOURCE_BUSY == rc || PMIX_ERR_WOULD_BLOCK == rc) { /* exit this event and let the event lib progress */ /* ensure we post the modified peer object before another thread * picks it back up */ PMIX_POST_OBJECT(peer); return; } else { /* the remote peer closed the connection - report that condition * and let the caller know */ pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "pmix_usock_msg_recv: peer closed connection"); goto err_close; } } /* success */ return; err_close: /* stop all events */ if (peer->recv_ev_active) { pmix_event_del(&peer->recv_event); peer->recv_ev_active = false; } if (peer->send_ev_active) { pmix_event_del(&peer->send_event); peer->send_ev_active = false; } if (NULL != peer->recv_msg) { PMIX_RELEASE(peer->recv_msg); peer->recv_msg = NULL; } pmix_ptl_base_lost_connection(peer, PMIX_ERR_UNREACH); /* ensure we post the modified peer object before another thread * picks it back up */ PMIX_POST_OBJECT(peer); }
/* * A file descriptor is available/ready for send. Check the state * of the socket and take the appropriate action. */ void pmix_usock_send_handler(int sd, short flags, void *cbdata) { pmix_peer_t *peer = (pmix_peer_t*)cbdata; pmix_ptl_send_t *msg = peer->send_msg; pmix_status_t rc; uint32_t nbytes; /* acquire the object */ PMIX_ACQUIRE_OBJECT(peer); pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "%s:%d usock:send_handler SENDING TO PEER %s:%d tag %u with %s msg", pmix_globals.myid.nspace, pmix_globals.myid.rank, peer->info->pname.nspace, peer->info->pname.rank, (NULL == msg) ? UINT_MAX : msg->hdr.tag, (NULL == msg) ? "NULL" : "NON-NULL"); if (NULL != msg) { if (!msg->hdr_sent) { if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { /* we have to convert the header back to host-byte order */ msg->hdr.pindex = ntohl(msg->hdr.pindex); msg->hdr.tag = ntohl(msg->hdr.tag); nbytes = msg->hdr.nbytes; msg->hdr.nbytes = ntohl(nbytes); } pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "usock:send_handler SENDING HEADER WITH MSG IDX %d TAG %d SIZE %lu", msg->hdr.pindex, msg->hdr.tag, (unsigned long)msg->hdr.nbytes); if (PMIX_SUCCESS == (rc = send_bytes(peer->sd, &msg->sdptr, &msg->sdbytes))) { /* header is completely sent */ pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "usock:send_handler HEADER SENT"); msg->hdr_sent = true; /* setup to send the data */ if (NULL == msg->data) { /* this was a zero-byte msg - nothing more to do */ PMIX_RELEASE(msg); peer->send_msg = NULL; goto next; } else { /* send the data as a single block */ msg->sdptr = msg->data->base_ptr; msg->sdbytes = msg->hdr.nbytes; } /* fall thru and let the send progress */ } else if (PMIX_ERR_RESOURCE_BUSY == rc || PMIX_ERR_WOULD_BLOCK == rc) { /* exit this event and let the event lib progress */ pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "usock:send_handler RES BUSY OR WOULD BLOCK"); if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { /* have to convert back again so we are correct when we re-enter */ msg->hdr.pindex = htonl(msg->hdr.pindex); msg->hdr.tag = htonl(msg->hdr.tag); nbytes = msg->hdr.nbytes; msg->hdr.nbytes = htonl(nbytes); } /* ensure we post the modified peer object before another thread * picks it back up */ PMIX_POST_OBJECT(peer); return; } else { // report the error event_del(&peer->send_event); peer->send_ev_active = false; PMIX_RELEASE(msg); peer->send_msg = NULL; pmix_ptl_base_lost_connection(peer, rc); /* ensure we post the modified peer object before another thread * picks it back up */ PMIX_POST_OBJECT(peer); return; } } if (msg->hdr_sent) { pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "usock:send_handler SENDING BODY OF MSG"); if (PMIX_SUCCESS == (rc = send_bytes(peer->sd, &msg->sdptr, &msg->sdbytes))) { // message is complete pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "usock:send_handler BODY SENT"); PMIX_RELEASE(msg); peer->send_msg = NULL; } else if (PMIX_ERR_RESOURCE_BUSY == rc || PMIX_ERR_WOULD_BLOCK == rc) { /* exit this event and let the event lib progress */ pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "usock:send_handler RES BUSY OR WOULD BLOCK"); /* ensure we post the modified peer object before another thread * picks it back up */ PMIX_POST_OBJECT(peer); return; } else { // report the error pmix_output(0, "pmix_usock_peer_send_handler: unable to send message ON SOCKET %d", peer->sd); pmix_event_del(&peer->send_event); peer->send_ev_active = false; PMIX_RELEASE(msg); peer->send_msg = NULL; pmix_ptl_base_lost_connection(peer, rc); /* ensure we post the modified peer object before another thread * picks it back up */ PMIX_POST_OBJECT(peer); return; } } next: /* if current message completed - progress any pending sends by * moving the next in the queue into the "on-deck" position. Note * that this doesn't mean we send the message right now - we will * wait for another send_event to fire before doing so. This gives * us a chance to service any pending recvs. */ peer->send_msg = (pmix_ptl_send_t*) pmix_list_remove_first(&peer->send_queue); } /* if nothing else to do unregister for send event notifications */ if (NULL == peer->send_msg && peer->send_ev_active) { pmix_event_del(&peer->send_event); peer->send_ev_active = false; } /* ensure we post the modified peer object before another thread * picks it back up */ PMIX_POST_OBJECT(peer); }
static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, pmix_info_t *info, size_t ninfo) { char *evar, **uri, *suri; char *filename, *nspace=NULL; pmix_rank_t rank = PMIX_RANK_WILDCARD; char *p, *p2; int sd, rc; size_t n; char myhost[PMIX_MAXHOSTNAMELEN]; bool system_level = false; bool system_level_only = false; pid_t pid = 0; pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp: connecting to server"); /* see if the connection info is in the info array - if * so, then that overrides all other options */ /* if I am a client, then we need to look for the appropriate * connection info in the environment */ if (PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) { if (NULL != (evar = getenv("PMIX_SERVER_URI21"))) { /* we are talking to a v2.1 server */ pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V21; pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "V21 SERVER DETECTED"); /* must use the v21 bfrops module */ pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module("v21"); if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) { return PMIX_ERR_INIT; } } else if (NULL != (evar = getenv("PMIX_SERVER_URI2"))) { /* we are talking to a v2.0 server */ pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V20; pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "V20 SERVER DETECTED"); /* must use the v20 bfrops module */ pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module("v20"); if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) { return PMIX_ERR_INIT; } } else { /* not us */ return PMIX_ERR_NOT_SUPPORTED; } /* the server will be using the same bfrops as us */ pmix_client_globals.myserver->nptr->compat.bfrops = pmix_globals.mypeer->nptr->compat.bfrops; /* mark that we are using the V2 protocol */ pmix_globals.mypeer->protocol = PMIX_PROTOCOL_V2; /* the URI consists of the following elements: * - server nspace.rank * - ptl rendezvous URI */ uri = pmix_argv_split(evar, ';'); if (2 != pmix_argv_count(uri)) { PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); pmix_argv_free(uri); return PMIX_ERR_NOT_SUPPORTED; } /* set the server nspace */ p = uri[0]; if (NULL == (p2 = strchr(p, '.'))) { PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); pmix_argv_free(uri); return PMIX_ERR_NOT_SUPPORTED; } *p2 = '\0'; ++p2; nspace = strdup(p); rank = strtoull(p2, NULL, 10); /* save the URI, but do not overwrite what we may have received from * the info-key directives */ if (NULL == mca_ptl_tcp_component.super.uri) { mca_ptl_tcp_component.super.uri = strdup(uri[1]); } pmix_argv_free(uri); pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp:client attempt connect to %s", mca_ptl_tcp_component.super.uri); /* go ahead and try to connect */ if (PMIX_SUCCESS != (rc = try_connect(&sd))) { free(nspace); return rc; } goto complete; } /* get here if we are a tool - check any provided directives * to see where they want us to connect to */ if (NULL != info) { for (n=0; n < ninfo; n++) { if (0 == strcmp(info[n].key, PMIX_CONNECT_TO_SYSTEM)) { system_level_only = PMIX_INFO_TRUE(&info[n]); } else if (0 == strcmp(info[n].key, PMIX_CONNECT_SYSTEM_FIRST)) { /* try the system-level */ system_level = PMIX_INFO_TRUE(&info[n]); } else if (0 == strcmp(info[n].key, PMIX_SERVER_PIDINFO)) { pid = info[n].value.data.pid; } else if (0 == strcmp(info[n].key, PMIX_SERVER_URI)) { if (NULL == mca_ptl_tcp_component.super.uri) { free(mca_ptl_tcp_component.super.uri); } mca_ptl_tcp_component.super.uri = strdup(info[n].value.data.string); } else if (0 == strcmp(info[n].key, PMIX_CONNECT_RETRY_DELAY)) { mca_ptl_tcp_component.wait_to_connect = info[n].value.data.uint32; } else if (0 == strcmp(info[n].key, PMIX_CONNECT_MAX_RETRIES)) { mca_ptl_tcp_component.max_retries = info[n].value.data.uint32; } } } /* mark that we are using the V2 protocol */ pmix_globals.mypeer->protocol = PMIX_PROTOCOL_V2; gethostname(myhost, sizeof(myhost)); /* if we were given a URI via MCA param, then look no further */ if (NULL != mca_ptl_tcp_component.super.uri) { /* if the string starts with "file:", then they are pointing * us to a file we need to read to get the URI itself */ if (0 == strncmp(mca_ptl_tcp_component.super.uri, "file:", 5)) { pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp:tool getting connection info from %s", mca_ptl_tcp_component.super.uri); nspace = NULL; rc = parse_uri_file(&mca_ptl_tcp_component.super.uri[5], &suri, &nspace, &rank); if (PMIX_SUCCESS != rc) { return PMIX_ERR_UNREACH; } free(mca_ptl_tcp_component.super.uri); mca_ptl_tcp_component.super.uri = suri; } else { /* we need to extract the nspace/rank of the server from the string */ p = strchr(mca_ptl_tcp_component.super.uri, ';'); if (NULL == p) { return PMIX_ERR_BAD_PARAM; } *p = '\0'; p++; suri = strdup(p); // save the uri portion /* the '.' in the first part of the original string separates * nspace from rank */ p = strchr(mca_ptl_tcp_component.super.uri, '.'); if (NULL == p) { free(suri); return PMIX_ERR_BAD_PARAM; } *p = '\0'; p++; nspace = strdup(mca_ptl_tcp_component.super.uri); rank = strtoull(p, NULL, 10); /* now update the URI */ free(mca_ptl_tcp_component.super.uri); mca_ptl_tcp_component.super.uri = suri; } pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp:tool attempt connect using given URI %s", mca_ptl_tcp_component.super.uri); /* go ahead and try to connect */ if (PMIX_SUCCESS != (rc = try_connect(&sd))) { if (NULL != nspace) { free(nspace); } return rc; } goto complete; } /* if they gave us a pid, then look for it */ if (0 != pid) { if (0 > asprintf(&filename, "pmix.%s.tool.%d", myhost, pid)) { return PMIX_ERR_NOMEM; } pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp:tool searching for given session server %s", filename); nspace = NULL; rc = df_search(mca_ptl_tcp_component.system_tmpdir, filename, &sd, &nspace, &rank); free(filename); if (PMIX_SUCCESS == rc) { goto complete; } if (NULL != nspace) { free(nspace); } /* since they gave us a specific pid and we couldn't * connect to it, return an error */ return PMIX_ERR_UNREACH; } /* if they asked for system-level, we start there */ if (system_level || system_level_only) { if (0 > asprintf(&filename, "%s/pmix.sys.%s", mca_ptl_tcp_component.system_tmpdir, myhost)) { return PMIX_ERR_NOMEM; } pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp:tool looking for system server at %s", filename); /* try to read the file */ rc = parse_uri_file(filename, &suri, &nspace, &rank); free(filename); if (PMIX_SUCCESS == rc) { mca_ptl_tcp_component.super.uri = suri; pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp:tool attempt connect to system server at %s", mca_ptl_tcp_component.super.uri); /* go ahead and try to connect */ if (PMIX_SUCCESS == try_connect(&sd)) { goto complete; } free(nspace); } } /* we get here if they either didn't ask for a system-level connection, * or they asked for it and it didn't succeed. If they _only_ wanted * a system-level connection, then we are done */ if (system_level_only) { pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp: connecting to system failed"); return PMIX_ERR_UNREACH; } /* they didn't give us a pid, so we will search to see what session-level * tools are available to this user. We will take the first connection * that succeeds - this is based on the likelihood that there is only * one session per user on a node */ if (0 > asprintf(&filename, "pmix.%s.tool", myhost)) { return PMIX_ERR_NOMEM; } pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp:tool searching for session server %s", filename); nspace = NULL; rc = df_search(mca_ptl_tcp_component.system_tmpdir, filename, &sd, &nspace, &rank); free(filename); if (PMIX_SUCCESS != rc) { if (NULL != nspace){ free(nspace); } return PMIX_ERR_UNREACH; } complete: pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "sock_peer_try_connect: Connection across to server succeeded"); /* do a final bozo check */ if (NULL == nspace || PMIX_RANK_WILDCARD == rank) { if (NULL != nspace) { free(nspace); } CLOSE_THE_SOCKET(sd); return PMIX_ERR_UNREACH; } /* mark the connection as made */ pmix_globals.connected = true; pmix_client_globals.myserver->sd = sd; /* setup the server info */ if (NULL == pmix_client_globals.myserver->info) { pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); } if (NULL == pmix_client_globals.myserver->nptr) { pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t); } if (NULL == pmix_client_globals.myserver->nptr->nspace) { pmix_client_globals.myserver->nptr->nspace = nspace; } else { free(nspace); } if (NULL == pmix_client_globals.myserver->info->pname.nspace) { pmix_client_globals.myserver->info->pname.nspace = strdup(pmix_client_globals.myserver->nptr->nspace); } pmix_client_globals.myserver->info->pname.rank = rank; pmix_ptl_base_set_nonblocking(sd); /* setup recv event */ pmix_event_assign(&pmix_client_globals.myserver->recv_event, pmix_globals.evbase, pmix_client_globals.myserver->sd, EV_READ | EV_PERSIST, pmix_ptl_base_recv_handler, pmix_client_globals.myserver); pmix_client_globals.myserver->recv_ev_active = true; PMIX_POST_OBJECT(pmix_client_globals.myserver); pmix_event_add(&pmix_client_globals.myserver->recv_event, 0); /* setup send event */ pmix_event_assign(&pmix_client_globals.myserver->send_event, pmix_globals.evbase, pmix_client_globals.myserver->sd, EV_WRITE|EV_PERSIST, pmix_ptl_base_send_handler, pmix_client_globals.myserver); pmix_client_globals.myserver->send_ev_active = false; return PMIX_SUCCESS; }
static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, pmix_info_t *info, size_t ninfo) { char *evar, **uri; char *filename, *host; FILE *fp; char *srvr, *p, *p2; struct sockaddr_in *in; struct sockaddr_in6 *in6; pmix_socklen_t len; int sd, rc; pmix_output_verbose(2, pmix_globals.debug_output, "ptl:tcp: connecting to server"); /* see if the connection info is in the info array - if * so, then that overrides all other options */ /* if I am a client, then we need to look for the appropriate * connection info in the environment */ if (PMIX_PROC_IS_CLIENT) { if (NULL == (evar = getenv("PMIX_SERVER_URI2"))) { /* not us */ return PMIX_ERR_NOT_SUPPORTED; } /* the URI consists of elements: * - server nspace.rank * - ptl rendezvous URI */ uri = pmix_argv_split(evar, ';'); if (2 != pmix_argv_count(uri)) { PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); pmix_argv_free(uri); return PMIX_ERR_NOT_SUPPORTED; } /* set the server nspace */ p = uri[0]; if (NULL == (p2 = strchr(p, '.'))) { PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); pmix_argv_free(uri); return PMIX_ERR_NOT_SUPPORTED; } *p2 = '\0'; ++p2; if (NULL == pmix_client_globals.myserver->info) { pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); } if (NULL == pmix_client_globals.myserver->nptr) { pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t); } if (NULL == pmix_client_globals.myserver->nptr->nspace) { pmix_client_globals.myserver->nptr->nspace = strdup(p); } if (NULL == pmix_client_globals.myserver->info->pname.nspace) { pmix_client_globals.myserver->info->pname.nspace = strdup(p); } /* set the server rank */ pmix_client_globals.myserver->info->pname.rank = strtoull(p2, NULL, 10); /* save the URI, but do not overwrite what we may have received from * the info-key directives */ if (NULL == mca_ptl_tcp_component.super.uri) { mca_ptl_tcp_component.super.uri = strdup(uri[1]); } pmix_argv_free(uri); } else if (PMIX_PROC_IS_TOOL) { /* if we already have a URI, then look no further */ if (NULL == mca_ptl_tcp_component.super.uri) { /* we have to discover the connection info, * if possible. Start by looking for the connection * info in the expected place - if the server supports * tool connections via TCP, then there will be a * "contact.txt" file under the system tmpdir */ filename = pmix_os_path(false, mca_ptl_tcp_component.tmpdir, "pmix-contact.txt", NULL); if (NULL == filename) { return PMIX_ERR_NOMEM; } fp = fopen(filename, "r"); if (NULL == fp) { /* if we cannot open the file, then the server must not * be configured to support tool connections - so abort */ free(filename); return PMIX_ERR_UNREACH; } free(filename); /* get the URI */ srvr = pmix_getline(fp); if (NULL == srvr) { PMIX_ERROR_LOG(PMIX_ERR_FILE_READ_FAILURE); fclose(fp); return PMIX_ERR_UNREACH; } fclose(fp); /* up to the first ';' is the server nspace/rank */ if (NULL == (p = strchr(srvr, ';'))) { /* malformed */ free(srvr); return PMIX_ERR_UNREACH; } *p = '\0'; ++p; // move past the semicolon /* the nspace is the section up to the '.' */ if (NULL == (p2 = strchr(srvr, '.'))) { /* malformed */ free(srvr); return PMIX_ERR_UNREACH; } *p2 = '\0'; ++p2; /* set the server nspace */ if (NULL == pmix_client_globals.myserver->info) { pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); } if (NULL == pmix_client_globals.myserver->nptr) { pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t); } if (NULL == pmix_client_globals.myserver->nptr->nspace) { pmix_client_globals.myserver->nptr->nspace = strdup(p); } if (NULL == pmix_client_globals.myserver->info->pname.nspace) { pmix_client_globals.myserver->info->pname.nspace = strdup(p); } pmix_client_globals.myserver->info->pname.rank = strtoull(p2, NULL, 10); /* now parse the uri itself */ mca_ptl_tcp_component.super.uri = strdup(p); free(srvr); } } /* mark that we are the active module for this server */ pmix_client_globals.myserver->nptr->compat.ptl = &pmix_ptl_tcp_module; /* setup the path to the daemon rendezvous point */ memset(&mca_ptl_tcp_component.connection, 0, sizeof(struct sockaddr_storage)); if (0 == strncmp(mca_ptl_tcp_component.super.uri, "tcp4", 4)) { /* separate the IP address from the port */ p = strdup(mca_ptl_tcp_component.super.uri); if (NULL == p) { return PMIX_ERR_NOMEM; } p2 = strchr(&p[7], ':'); if (NULL == p2) { free(p); return PMIX_ERR_BAD_PARAM; } *p2 = '\0'; ++p2; host = &p[7]; /* load the address */ in = (struct sockaddr_in*)&mca_ptl_tcp_component.connection; in->sin_family = AF_INET; in->sin_addr.s_addr = inet_addr(host); if (in->sin_addr.s_addr == INADDR_NONE) { free(p); return PMIX_ERR_BAD_PARAM; } in->sin_port = htons(atoi(p2)); len = sizeof(struct sockaddr_in); } else { /* separate the IP address from the port */ p = strdup(mca_ptl_tcp_component.super.uri); if (NULL == p) { return PMIX_ERR_NOMEM; } p2 = strchr(&p[7], ':'); if (NULL == p2) { free(p); return PMIX_ERR_BAD_PARAM; } *p2 = '\0'; if (']' == p[strlen(p)-1]) { p[strlen(p)-1] = '\0'; } if ('[' == p[7]) { host = &p[8]; } else { host = &p[7]; } /* load the address */ in6 = (struct sockaddr_in6*)&mca_ptl_tcp_component.connection; in6->sin6_family = AF_INET6; if (0 == inet_pton(AF_INET6, host, (void*)&in6->sin6_addr)) { pmix_output (0, "ptl_tcp_parse_uri: Could not convert %s\n", host); free(p); return PMIX_ERR_BAD_PARAM; } in6->sin6_port = htons(atoi(p2)); len = sizeof(struct sockaddr_in6); } free(p); /* establish the connection */ if (PMIX_SUCCESS != (rc = pmix_ptl_base_connect(&mca_ptl_tcp_component.connection, len, &sd))) { PMIX_ERROR_LOG(rc); return rc; } pmix_client_globals.myserver->sd = sd; /* send our identity and any authentication credentials to the server */ if (PMIX_SUCCESS != (rc = send_connect_ack(sd))) { PMIX_ERROR_LOG(rc); CLOSE_THE_SOCKET(sd); return rc; } /* do whatever handshake is required */ if (PMIX_SUCCESS != (rc = recv_connect_ack(sd))) { PMIX_ERROR_LOG(rc); CLOSE_THE_SOCKET(sd); return rc; } pmix_output_verbose(2, pmix_globals.debug_output, "sock_peer_try_connect: Connection across to server succeeded"); /* mark the connection as made */ pmix_globals.connected = true; pmix_ptl_base_set_nonblocking(sd); /* setup recv event */ pmix_event_assign(&pmix_client_globals.myserver->recv_event, pmix_globals.evbase, pmix_client_globals.myserver->sd, EV_READ | EV_PERSIST, pmix_ptl_base_recv_handler, pmix_client_globals.myserver); pmix_client_globals.myserver->recv_ev_active = true; PMIX_POST_OBJECT(pmix_client_globals.myserver); pmix_event_add(&pmix_client_globals.myserver->recv_event, 0); /* setup send event */ pmix_event_assign(&pmix_client_globals.myserver->send_event, pmix_globals.evbase, pmix_client_globals.myserver->sd, EV_WRITE|EV_PERSIST, pmix_ptl_base_send_handler, pmix_client_globals.myserver); pmix_client_globals.myserver->send_ev_active = false; return PMIX_SUCCESS; }