Example #1
0
int __recvpath
psmi_mq_handle_rts_outoforder(psm_mq_t mq, uint64_t tag, 
		   uintptr_t send_buf, uint32_t send_msglen, 
		   psm_epaddr_t peer, uint16_t msg_seqnum,
		   mq_rts_callback_fn_t cb, 
		   psm_mq_req_t *req_o)
{
    psm_mq_req_t req;

    PSMI_PLOCK_ASSERT();

    req = psmi_mq_req_alloc(mq, MQE_TYPE_RECV);
    psmi_assert(req != NULL);

    /* We don't know recv_msglen yet but we set it here for
     * mq_iprobe */
    req->send_msglen = req->recv_msglen = send_msglen;
    req->state = MQ_STATE_UNEXP_RV;
    req->tag = tag;
    req->rts_callback = cb;
    req->recv_msgoff = 0;
    req->send_msgoff = 0;
    req->rts_peer = peer;
    req->rts_sbuf = send_buf;
    req->msg_seqnum = msg_seqnum;
    mq_sq_append(&peer->mctxt_master->outoforder_q, req);
    peer->mctxt_master->outoforder_c++;
    *req_o = req; /* no match, will callback */

    _IPATH_VDBG("from=%s match=%s (req=%p) mqtag=%" PRIx64" recvlen=%d "
		"sendlen=%d errcode=%d\n", psmi_epaddr_get_name(peer->epid), 
		"NO", req, req->tag, 
		req->recv_msglen, req->send_msglen, req->error_code);
    return MQ_RET_UNEXP_OK;
}
Example #2
0
int __recvpath
psmi_mq_handle_data(psm_mq_req_t req, psm_epaddr_t epaddr,
		    uint32_t egrid, uint32_t offset,
		    const void *buf, uint32_t nbytes)
{
    psm_mq_t mq;
    int rc;
    
    if (req == NULL) goto no_req;

    mq = req->mq;
    if (req->state == MQ_STATE_MATCHED)
	rc = MQ_RET_MATCH_OK;
    else {
	psmi_assert(req->state == MQ_STATE_UNEXP);
	rc = MQ_RET_UNEXP_OK;
    }

    psmi_assert(req->egrid.egr_data == egrid);
    psmi_mq_req_copy(req, epaddr, offset, buf, nbytes);

    if (req->send_msgoff == req->send_msglen) {
	if (req->type & MQE_TYPE_EGRLONG) {
	    STAILQ_REMOVE(&epaddr->mctxt_master->egrlong,
				req, psm_mq_req, nextq);
	}
	    
	if (req->state == MQ_STATE_MATCHED) {
	    req->state = MQ_STATE_COMPLETE;
	    mq_qq_append(&mq->completed_q, req);
	}
	else { /* MQ_STATE_UNEXP */
	    req->state = MQ_STATE_COMPLETE;
	}
	_IPATH_VDBG("epaddr=%s completed %d byte send, state=%d\n", 
		    psmi_epaddr_get_name(epaddr->epid),
		    (int)req->send_msglen, req->state);
    }

    return rc;

no_req:
    mq = epaddr->ep->mq;
    req = psmi_mq_req_alloc(mq, MQE_TYPE_RECV);
    psmi_assert(req != NULL);

    req->egrid.egr_data = egrid;
    req->recv_msgoff = offset;
    req->recv_msglen = nbytes;
    req->buf = psmi_mq_sysbuf_alloc(mq, nbytes);
    psmi_mq_mtucpy(req->buf, buf, nbytes);

    STAILQ_INSERT_TAIL(&epaddr->mctxt_master->egrdata, req, nextq);

    return MQ_RET_UNEXP_OK;
}
Example #3
0
/*
 * Add ipsaddr with epid to the epstate table, return new index to caller in
 * 'connidx'.
 */
psm2_error_t
ips_epstate_add(struct ips_epstate *eps, struct ips_epaddr *ipsaddr,
		ips_epstate_idx *connidx_o)
{
	int i, j;
	ips_epstate_idx connidx;

	if (++eps->eps_tabsizeused > eps->eps_tabsize) {	/* realloc */
		struct ips_epstate_entry *newtab;
		eps->eps_tabsize += PTL_EPADDR_ALLOC_CHUNK;
		newtab = (struct ips_epstate_entry *)
		    psmi_calloc(eps->context->ep, PER_PEER_ENDPOINT,
				eps->eps_tabsize,
				sizeof(struct ips_epstate_entry));
		if (newtab == NULL)
			return PSM2_NO_MEMORY;
		else if (eps->eps_tab) {	/* NOT first alloc */
			for (i = 0;
			     i < eps->eps_tabsize - PTL_EPADDR_ALLOC_CHUNK; i++)
				newtab[i] = eps->eps_tab[i];	/* deep copy */
			psmi_free(eps->eps_tab);
		}
		eps->eps_tab = newtab;
	}
	/* Find the next free hole.  We can afford to do this since connect is not
	 * in the critical path */
	for (i = 0, j = eps->eps_tab_nextidx; i < eps->eps_tabsize; i++, j++) {
		if (j == eps->eps_tabsize)
			j = 0;
		if (eps->eps_tab[j].ipsaddr == NULL) {
			eps->eps_tab_nextidx = j + 1;
			if (eps->eps_tab_nextidx == eps->eps_tabsize)
				eps->eps_tab_nextidx = 0;
			break;
		}
	}
	psmi_assert_always(i != eps->eps_tabsize);
	connidx = (j - eps->eps_base_idx) & (IPS_EPSTATE_CONNIDX_MAX-1);
	_HFI_VDBG("node %s gets connidx=%d (table idx %d)\n",
		  psmi_epaddr_get_name(((psm2_epaddr_t) ipsaddr)->epid), connidx,
		  j);
	eps->eps_tab[j].ipsaddr = ipsaddr;
	if (j >= IPS_EPSTATE_CONNIDX_MAX) {
		return psmi_handle_error(eps->context->ep,
					 PSM2_TOO_MANY_ENDPOINTS,
					 "Can't connect to more than %d non-local endpoints",
					 IPS_EPSTATE_CONNIDX_MAX);
	}
	*connidx_o = connidx;
	return PSM2_OK;
}
Example #4
0
int __recvpath
psmi_mq_handle_rts(psm_mq_t mq, uint64_t tag, 
		   uintptr_t send_buf, uint32_t send_msglen, 
		   psm_epaddr_t peer, mq_rts_callback_fn_t cb, 
		   psm_mq_req_t *req_o)
{
    psm_mq_req_t req;
    uint32_t msglen;
    int rc;

    PSMI_PLOCK_ASSERT();

    req = mq_req_match(&(mq->expected_q), tag, 1);

    if (req) { /* we have a match, no need to callback */
	msglen = mq_set_msglen(req, req->buf_len, send_msglen);
	req->type = MQE_TYPE_RECV;
	req->state = MQ_STATE_MATCHED;
	req->tag = tag;
	req->recv_msgoff = 0;
	req->rts_peer = peer;
	req->rts_sbuf = send_buf;
	*req_o = req; /* yes match */
	rc = MQ_RET_MATCH_OK;
    }
    else { /* No match, keep track of callback */
	req = psmi_mq_req_alloc(mq, MQE_TYPE_RECV);
	psmi_assert(req != NULL);

	req->type = MQE_TYPE_RECV;
	/* We don't know recv_msglen yet but we set it here for
	 * mq_iprobe */
	req->send_msglen = req->recv_msglen = send_msglen;
	req->state = MQ_STATE_UNEXP_RV;
	req->tag = tag;
	req->rts_callback = cb;
	req->recv_msgoff = 0;
	req->rts_peer = peer;
	req->rts_sbuf = send_buf;
	mq_sq_append(&mq->unexpected_q, req);
	*req_o = req; /* no match, will callback */
	rc = MQ_RET_UNEXP_OK;
    }

    _IPATH_VDBG("from=%s match=%s (req=%p) mqtag=%" PRIx64" recvlen=%d "
		"sendlen=%d errcode=%d\n", psmi_epaddr_get_name(peer->epid), 
		rc == MQ_RET_MATCH_OK ? "YES" : "NO", req, req->tag, 
		req->recv_msglen, req->send_msglen, req->error_code);
    return rc;
}
Example #5
0
int __recvpath
psmi_mq_handle_data(psm_mq_req_t req, psm_epaddr_t epaddr, 
		    const void *buf, uint32_t nbytes)
{
    psm_mq_t mq = req->mq;
    int rc;
    
    if (req->state == MQ_STATE_MATCHED)
	rc = MQ_RET_MATCH_OK;
    else {
	psmi_assert(req->state == MQ_STATE_UNEXP);
	rc = MQ_RET_UNEXP_OK;
    }

    psmi_mq_req_copy(req, epaddr, buf, nbytes);

    if (req->send_msgoff == req->send_msglen) {
	if (req->type & MQE_TYPE_EGRLONG) {
	    int flowid = req->egrid.egr_flowid;
	    psmi_assert(STAILQ_FIRST(&epaddr->egrlong[flowid]) == req);
	    STAILQ_REMOVE_HEAD(&epaddr->egrlong[flowid], nextq);
	}
	    
	/* Whatever is leftover in the posted message should be now marked as
	 * undefined.
	 * XXX Sends not supported yet.
	 */
#if 0
#ifdef PSM_VALGRIND
	if (req->send_msglen < req->buf_len)
	    VALGRIND_MAKE_MEM_UNDEFINED(
		(void *) ((uintptr_t) req->buf + req->send_msglen), 
		req->buf_len - req->send_msglen);
#endif
#endif
	if (req->state == MQ_STATE_MATCHED) {
	    req->state = MQ_STATE_COMPLETE;
	    mq_qq_append(&mq->completed_q, req);
	}
	else { /* MQ_STATE_UNEXP */
	    req->state = MQ_STATE_COMPLETE;
	}
	_IPATH_VDBG("epaddr=%s completed %d byte send, state=%d\n", 
		    psmi_epaddr_get_name(epaddr->epid),
		    (int)req->send_msglen, req->state);
    }

    return rc;
}
Example #6
0
int __recvpath
psmi_mq_handle_envelope_outoforder(psm_mq_t mq, uint16_t mode,
		   psm_epaddr_t epaddr, uint16_t msg_seqnum,
		   uint64_t tag, psmi_egrid_t egrid, uint32_t send_msglen, 
		   const void *payload, uint32_t paylen)
{
    psm_mq_req_t req;
    uint32_t msglen;

    req = psmi_mq_req_alloc(mq, MQE_TYPE_RECV);
    psmi_assert(req != NULL);

    req->tag = tag;
    req->recv_msgoff = 0;
    req->recv_msglen = req->send_msglen = req->buf_len = msglen = send_msglen;

    _IPATH_VDBG(
		"from=%s match=NO (req=%p) mode=%x mqtag=%" PRIx64
		" send_msglen=%d\n", psmi_epaddr_get_name(epaddr->epid), 
		req, mode, tag, send_msglen);
    switch (mode) {
	case MQ_MSG_TINY:
	    if (msglen > 0) {
		req->buf = psmi_mq_sysbuf_alloc(mq, msglen);
		mq_copy_tiny((uint32_t *)req->buf, (uint32_t *)payload, msglen);
	    }
	    else
		req->buf = NULL;
	    req->state = MQ_STATE_COMPLETE;
	    break;

	case MQ_MSG_SHORT:
	    req->buf = psmi_mq_sysbuf_alloc(mq, msglen);
	    psmi_mq_mtucpy(req->buf, payload, msglen);
	    req->state = MQ_STATE_COMPLETE;
	    break;

	case MQ_MSG_LONG:
	    req->egrid = egrid;
	    req->epaddr = epaddr;
	    req->send_msgoff = 0;
	    req->buf = psmi_mq_sysbuf_alloc(mq, msglen);
	    req->state = MQ_STATE_UNEXP;
	    req->type |= MQE_TYPE_EGRLONG;
	    STAILQ_INSERT_TAIL(&epaddr->mctxt_master->egrlong, req, nextq);
	    _IPATH_VDBG("unexp MSG_LONG %d of length %d bytes pay=%d\n", 
			egrid.egr_msgno, msglen, paylen);
	    if (paylen > 0)
		psmi_mq_handle_data(req, epaddr,
			egrid.egr_data, 0, payload, paylen);
	    psmi_mq_handle_egrdata(mq, req, epaddr);
	    break;

	default:
	    psmi_handle_error(PSMI_EP_NORETURN, PSM_INTERNAL_ERR,
			    "Internal error, unknown packet 0x%x", mode);
    }

    req->msg_seqnum = msg_seqnum;
    mq_sq_append(&epaddr->mctxt_master->outoforder_q, req);
    epaddr->mctxt_master->outoforder_c++;
    mq->stats.rx_sys_bytes += msglen;
    mq->stats.rx_sys_num++;

    return MQ_RET_UNEXP_OK;
}
Example #7
0
/* 
 * This handles the regular (i.e. non-rendezvous MPI envelopes) 
 */
int __recvpath
psmi_mq_handle_envelope(psm_mq_t mq, uint16_t mode, psm_epaddr_t epaddr,
		   uint64_t tag, psmi_egrid_t egrid, uint32_t send_msglen, 
		   const void *payload, uint32_t paylen)
{
    psm_mq_req_t req;
    uint32_t msglen;
    int rc;

    psmi_assert(epaddr != NULL);

    req = mq_req_match(&(mq->expected_q), tag, 1);

    if (req) { /* we have a match */
	psmi_assert(MQE_TYPE_IS_RECV(req->type));
	req->tag = tag;
	msglen = mq_set_msglen(req, req->buf_len, send_msglen);

	_IPATH_VDBG("from=%s match=YES (req=%p) mode=%x mqtag=%"
		PRIx64" msglen=%d paylen=%d\n", psmi_epaddr_get_name(epaddr->epid), 
		req, mode, tag, msglen, paylen);

	switch(mode) {
	    case MQ_MSG_TINY:
		PSM_VALGRIND_DEFINE_MQ_RECV(req->buf, req->buf_len, msglen);
		mq_copy_tiny((uint32_t *)req->buf, (uint32_t *)payload, msglen);
		req->state = MQ_STATE_COMPLETE;
		mq_qq_append(&mq->completed_q, req);
		break;

	    case MQ_MSG_SHORT: /* message fits in 1 payload */
		PSM_VALGRIND_DEFINE_MQ_RECV(req->buf, req->buf_len, msglen);
		psmi_mq_mtucpy(req->buf, payload, msglen);
		req->state = MQ_STATE_COMPLETE;
		mq_qq_append(&mq->completed_q, req);
		break;

	    case MQ_MSG_LONG:
		req->egrid = egrid;
		req->state = MQ_STATE_MATCHED;
		req->type |= MQE_TYPE_EGRLONG;
		req->send_msgoff = req->recv_msgoff = 0;
		STAILQ_INSERT_TAIL(&epaddr->mctxt_master->egrlong, req, nextq);
		_IPATH_VDBG("exp MSG_LONG %d of length %d bytes pay=%d\n", 
			egrid.egr_msgno, msglen, paylen);
		if (paylen > 0)
		    psmi_mq_handle_data(req, epaddr,
			egrid.egr_data, 0, payload, paylen);
		psmi_mq_handle_egrdata(mq, req, epaddr);
		break;

	    default:
		psmi_handle_error(PSMI_EP_NORETURN, PSM_INTERNAL_ERR,
			    "Internal error, unknown packet 0x%x", mode);
	}

	mq->stats.rx_user_bytes += msglen;
	mq->stats.rx_user_num++;

	rc = MQ_RET_MATCH_OK;
	if (mode == MQ_MSG_LONG)
	    return rc;
    }
    else
	rc =  psmi_mq_handle_envelope_unexpected(mq, mode, epaddr, tag,
		    egrid, send_msglen, payload, paylen);

    return rc;
}
Example #8
0
int __recvpath
psmi_mq_handle_envelope_unexpected(
	psm_mq_t mq, uint16_t mode, psm_epaddr_t epaddr,
	uint64_t tag, psmi_egrid_t egrid, uint32_t send_msglen, 
	const void *payload, uint32_t paylen)
{
    psm_mq_req_t req;
    uint32_t msglen;

    /* 
     * Keep a callback here in case we want to fit some other high-level
     * protocols over MQ (i.e. shmem).  These protocols would bypass the
     * normal mesage handling and go to higher-level message handlers.
     */
    if (mode >= MQ_MSG_USER_FIRST && mq->unexpected_callback) {
	mq->unexpected_callback(mq,mode,epaddr,tag,send_msglen,payload,paylen);
	return MQ_RET_UNEXP_OK;
    }
    req = psmi_mq_req_alloc(mq, MQE_TYPE_RECV);
    psmi_assert(req != NULL);

    req->tag = tag;
    req->recv_msgoff = 0;
    req->recv_msglen = req->send_msglen = req->buf_len = msglen = send_msglen;

    _IPATH_VDBG(
		"from=%s match=NO (req=%p) mode=%x mqtag=%" PRIx64
		" send_msglen=%d\n", psmi_epaddr_get_name(epaddr->epid), 
		req, mode, tag, send_msglen);
#if 0
    if (mq->cur_sysbuf_bytes+msglen > mq->max_sysbuf_bytes) {
		_IPATH_VDBG("req=%p with len=%d exceeds limit of %llu sysbuf_bytes\n",
			req, msglen, (unsigned long long) mq->max_sysbuf_bytes);
		return MQ_RET_UNEXP_NO_RESOURCES;
    }
#endif
    switch (mode) {
	case MQ_MSG_TINY:
	    if (msglen > 0) {
		req->buf = psmi_mq_sysbuf_alloc(mq, msglen);
		mq_copy_tiny((uint32_t *)req->buf, (uint32_t *)payload, msglen);
	    }
	    else
		req->buf = NULL;
	    req->state = MQ_STATE_COMPLETE;
	    break;

	case MQ_MSG_SHORT:
	    req->buf = psmi_mq_sysbuf_alloc(mq, msglen);
	    psmi_mq_mtucpy(req->buf, payload, msglen);
	    req->state = MQ_STATE_COMPLETE;
	    break;

	case MQ_MSG_LONG:
	    req->egrid = egrid;
	    req->send_msgoff = 0;
	    req->buf = psmi_mq_sysbuf_alloc(mq, msglen);
	    req->state = MQ_STATE_UNEXP;
	    req->type |= MQE_TYPE_EGRLONG;
	    STAILQ_INSERT_TAIL(&epaddr->mctxt_master->egrlong, req, nextq);
	    _IPATH_VDBG("unexp MSG_LONG %d of length %d bytes pay=%d\n", 
			egrid.egr_msgno, msglen, paylen);
	    if (paylen > 0)
		psmi_mq_handle_data(req, epaddr,
			egrid.egr_data, 0, payload, paylen);
	    psmi_mq_handle_egrdata(mq, req, epaddr);
	    break;

	default:
	    psmi_handle_error(PSMI_EP_NORETURN, PSM_INTERNAL_ERR,
			    "Internal error, unknown packet 0x%x", mode);
    }
    mq_sq_append(&mq->unexpected_q, req);
    mq->stats.rx_sys_bytes += msglen;
    mq->stats.rx_sys_num++;

    return MQ_RET_UNEXP_OK;
}
Example #9
0
psm2_error_t
__psm2_ep_connect(psm2_ep_t ep, int num_of_epid, psm2_epid_t const *array_of_epid,
		 int const *array_of_epid_mask,	/* can be NULL */
		 psm2_error_t *array_of_errors, psm2_epaddr_t *array_of_epaddr,
		 int64_t timeout)
{
	psm2_error_t err = PSM2_OK;
	ptl_ctl_t *ptlctl;
	ptl_t *ptl;
	int i, j, dup_idx;
	int num_toconnect = 0;
	int *epid_mask = NULL;
	int *epid_mask_isdupof = NULL;
	char *device;
	uint64_t t_start = get_cycles();
	uint64_t t_left;
	union psmi_envvar_val timeout_intval;

	PSM2_LOG_MSG("entering");
	PSMI_ERR_UNLESS_INITIALIZED(ep);

	PSMI_PLOCK();

	/*
	 * Normally we would lock here, but instead each implemented ptl component
	 * does its own locking.  This is mostly because the ptl components are
	 * ahead of the PSM interface in that they can disconnect their peers.
	 */
	if (ep == NULL || array_of_epaddr == NULL || array_of_epid == NULL ||
	    num_of_epid < 1) {
		err = psmi_handle_error(ep, PSM2_PARAM_ERR,
					"Invalid psm2_ep_connect parameters");
		goto fail;
	}

	/* We need two of these masks to detect duplicates */
	err = PSM2_NO_MEMORY;
	epid_mask =
	    (int *)psmi_malloc(ep, UNDEFINED, sizeof(int) * num_of_epid);
	if (epid_mask == NULL)
		goto fail;
	epid_mask_isdupof =
	    (int *)psmi_malloc(ep, UNDEFINED, sizeof(int) * num_of_epid);
	if (epid_mask_isdupof == NULL)
		goto fail;
	err = PSM2_OK;

	/* Eventually handle timeouts across all connects. */
	for (j = 0; j < num_of_epid; j++) {
		if (array_of_epid_mask != NULL && !array_of_epid_mask[j])
			epid_mask[j] = 0;
		else {
			epid_mask[j] = 1;
			array_of_errors[j] = PSM2_EPID_UNKNOWN;
			array_of_epaddr[j] = NULL;
			num_toconnect++;
		}
		epid_mask_isdupof[j] = -1;
	}

	psmi_getenv("PSM2_CONNECT_TIMEOUT",
		    "End-point connection timeout over-ride. 0 for no time-out.",
		    PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_UINT,
		    (union psmi_envvar_val)0, &timeout_intval);

	if (getenv("PSM2_CONNECT_TIMEOUT")) {
		timeout = timeout_intval.e_uint * SEC_ULL;
	} else if (timeout > 0) {
		/* The timeout parameter provides the minimum timeout. A heuristic
		 * is used to scale up the timeout linearly with the number of
		 * endpoints, and we allow one second per 100 endpoints. */
		timeout = max(timeout, (num_toconnect * SEC_ULL) / 100);
	}

	if (timeout > 0 && timeout < PSMI_MIN_EP_CONNECT_TIMEOUT)
		timeout = PSMI_MIN_EP_CONNECT_TIMEOUT;
	_HFI_PRDBG("Connect to %d endpoints with time-out of %.2f secs\n",
		   num_toconnect, (double)timeout / 1e9);

	/* Look for duplicates in input array */
	for (i = 0; i < num_of_epid; i++) {
		for (j = i + 1; j < num_of_epid; j++) {
			if (array_of_epid[i] == array_of_epid[j] &&
			    epid_mask[i] && epid_mask[j]) {
				epid_mask[j] = 0;	/* don't connect more than once */
				epid_mask_isdupof[j] = i;
			}
		}
	}

	for (i = 0; i < PTL_MAX_INIT; i++) {
		if (ep->devid_enabled[i] == -1)
			continue;
		/* Set up the right connect ptrs */
		switch (ep->devid_enabled[i]) {
		case PTL_DEVID_IPS:
			ptlctl = &ep->ptl_ips;
			ptl = ep->ptl_ips.ptl;
			device = "ips";
			break;
		case PTL_DEVID_AMSH:
			ptlctl = &ep->ptl_amsh;
			ptl = ep->ptl_amsh.ptl;
			device = "amsh";
			break;
		case PTL_DEVID_SELF:
			ptlctl = &ep->ptl_self;
			ptl = ep->ptl_self.ptl;
			device = "self";
			break;
		default:
			device = "unknown";
			ptlctl = &ep->ptl_ips;	/*no-unused */
			ptl = ep->ptl_ips.ptl;	/*no-unused */
			device = "ips";	/*no-unused */
			psmi_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
					  "Unknown/unhandled PTL id %d\n",
					  ep->devid_enabled[i]);
			break;
		}
		t_left = psmi_cycles_left(t_start, timeout);

		_HFI_VDBG("Trying to connect with device %s\n", device);
		if ((err = ptlctl->ep_connect(ptl, num_of_epid, array_of_epid,
					      epid_mask, array_of_errors,
					      array_of_epaddr,
					      cycles_to_nanosecs(t_left)))) {
			_HFI_PRDBG("Connect failure in device %s err=%d\n",
				   device, err);
			goto connect_fail;
		}

		/* Now process what's been connected */
		for (j = 0; j < num_of_epid; j++) {
			dup_idx = epid_mask_isdupof[j];
			if (!epid_mask[j] && dup_idx == -1)
				continue;

			if (dup_idx != -1) {	/* dup */
				array_of_epaddr[j] = array_of_epaddr[dup_idx];
				array_of_errors[j] = array_of_errors[dup_idx];
				epid_mask_isdupof[j] = -1;
			}

			if (array_of_errors[j] == PSM2_OK) {
				epid_mask[j] = 0;	/* don't try on next ptl */
				ep->connections++;
			}
		}
	}

	for (i = 0; i < num_of_epid; i++) {
		ptl_ctl_t *c = NULL;
		if (array_of_epid_mask != NULL && !array_of_epid_mask[i])
			continue;
		/* If we see unreachable here, that means some PTLs were not enabled */
		if (array_of_errors[i] == PSM2_EPID_UNREACHABLE) {
			err = PSM2_EPID_UNREACHABLE;
			break;
		}

		psmi_assert_always(array_of_epaddr[i] != NULL);
		c = array_of_epaddr[i]->ptlctl;
		psmi_assert_always(c != NULL);
		_HFI_VDBG("%-20s DEVICE %s (%p)\n",
			  psmi_epaddr_get_name(array_of_epid[i]),
			  c == &ep->ptl_ips ? "hfi" :
			  (c == &ep->ptl_amsh ? "amsh" : "self"),
			  (void *)array_of_epaddr[i]->ptlctl->ptl);
	}

connect_fail:
	/* If the error is a timeout (at worse) and the client is OPA MPI,
	 * just return timeout to let OPA MPI handle the hostnames that
	 * timed out */
	if (err != PSM2_OK) {
		char errbuf[PSM2_ERRSTRING_MAXLEN];
		size_t len;
		int j = 0;

		if (err == PSM2_EPID_UNREACHABLE) {
			char *deverr = "of an incorrect setting";
			char *eperr = " ";
			char *devname = NULL;
			if (!psmi_ep_device_is_enabled(ep, PTL_DEVID_AMSH)) {
				deverr =
				    "there is no shared memory PSM device (shm)";
				eperr = " shared memory ";
			} else
			    if (!psmi_ep_device_is_enabled(ep, PTL_DEVID_IPS)) {
				deverr =
				    "there is no OPA PSM device (hfi)";
				eperr = " OPA ";
			}

			len = snprintf(errbuf, sizeof(errbuf) - 1,
				       "Some%sendpoints could not be connected because %s "
				       "in the currently enabled PSM_DEVICES (",
				       eperr, deverr);
			for (i = 0; i < PTL_MAX_INIT && len < sizeof(errbuf) - 1;
			     i++) {
				switch (ep->devid_enabled[i]) {
				case PTL_DEVID_IPS:
					devname = "hfi";
					break;
				case PTL_DEVID_AMSH:
					devname = "shm";
					break;
				case PTL_DEVID_SELF:
				default:
					devname = "self";
					break;
				}
				len +=
				    snprintf(errbuf + len,
					     sizeof(errbuf) - len - 1, "%s,",
					     devname);
			}
			if (len < sizeof(errbuf) - 1 && devname != NULL)
				/* parsed something, remove trailing comma */
				errbuf[len - 1] = ')';
		} else
			len = snprintf(errbuf, sizeof(errbuf) - 1,
				       "%s", err == PSM2_TIMEOUT ?
				       "Dectected connection timeout" :
				       psm2_error_get_string(err));

		/* first pass, look for all nodes with the error */
		for (i = 0; i < num_of_epid && len < sizeof(errbuf) - 1; i++) {
			if (array_of_epid_mask != NULL
			    && !array_of_epid_mask[i])
				continue;
			if (array_of_errors[i] == PSM2_OK)
				continue;
			if (array_of_errors[i] == PSM2_EPID_UNREACHABLE &&
			    err != PSM2_EPID_UNREACHABLE)
				continue;
			if (err == array_of_errors[i]) {
				len +=
				    snprintf(errbuf + len,
					     sizeof(errbuf) - len - 1, "%c %s",
					     j == 0 ? ':' : ',',
					     psmi_epaddr_get_hostname
					     (array_of_epid[i]));
				j++;
			}
		}
		errbuf[sizeof(errbuf) - 1] = '\0';
		err = psmi_handle_error(ep, err, errbuf);
	}

fail:
	PSMI_PUNLOCK();

	if (epid_mask != NULL)
		psmi_free(epid_mask);
	if (epid_mask_isdupof != NULL)
		psmi_free(epid_mask_isdupof);

	PSM2_LOG_MSG("leaving");
	return err;
}