示例#1
0
文件: mtl_psm2.c 项目: 00datman/ompi
int
ompi_mtl_psm2_finalize(struct mca_mtl_base_module_t* mtl) {
    psm2_error_t err;

    opal_progress_unregister(ompi_mtl_psm2_progress);

    /* free resources */
    err = psm2_mq_finalize(ompi_mtl_psm2.mq);
    if (err) {
        opal_output(0, "Error in psm2_mq_finalize (error %s)\n",
		    psm2_error_get_string(err));
        return OMPI_ERROR;
    }

    err = psm2_ep_close(ompi_mtl_psm2.ep, PSM2_EP_CLOSE_GRACEFUL, 1*1e9);
    if (err) {
        opal_output(0, "Error in psm2_ep_close (error %s)\n",
		    psm2_error_get_string(err));
        return OMPI_ERROR;
    }

    err = psm2_finalize();
    if (err) {
        opal_output(0, "Error in psm2_finalize (error %s)\n",
		    psm2_error_get_string(err));
        return OMPI_ERROR;
    }

    return OMPI_SUCCESS;
}
示例#2
0
文件: mtl_psm2.c 项目: 00datman/ompi
static
psm2_error_t
ompi_mtl_psm2_errhandler(psm2_ep_t ep, const psm2_error_t error,
			const char *error_string, psm2_error_token_t token)
{
    switch (error) {
	/* We don't want PSM2 to default to exiting when the following errors occur */
	case PSM2_EP_DEVICE_FAILURE:
	case PSM2_EP_NO_DEVICE:
	case PSM2_EP_NO_PORTS_AVAIL:
	case PSM2_EP_NO_NETWORK:
	case PSM2_EP_INVALID_UUID_KEY:
	  opal_show_help("help-mtl-psm2.txt",
			 "unable to open endpoint", true,
			 psm2_error_get_string(error));
	    break;

	/* We can't handle any other errors than the ones above */
	default:
	    opal_output(0, "Open MPI detected an unexpected PSM2 error in opening "
			"an endpoint: %s\n", error_string);
	    return psm2_error_defer(token);
	    break;
    }
    return error;
}
示例#3
0
文件: mtl_psm2.c 项目: 00datman/ompi
static
const char *
ompi_mtl_psm2_connect_error_msg(psm2_error_t err)
{
    switch (err) { /* See if we expect the error */
	case PSM2_EPID_UNREACHABLE:
	case PSM2_EPID_INVALID_NODE:
	case PSM2_EPID_INVALID_MTU:
	case PSM2_EPID_INVALID_UUID_KEY:
	case PSM2_EPID_INVALID_VERSION:
	case PSM2_EPID_INVALID_CONNECT:
	    return psm2_error_get_string(err);
	    break;
	case PSM2_EPID_UNKNOWN:
	    return "Connect status could not be determined "
		   "because of other errors";
	default:
	    return NULL;
    }
}
示例#4
0
文件: mtl_psm2.c 项目: 00datman/ompi
int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) {
    psm2_error_t err;
    psm2_ep_t	ep; /* endpoint handle */
    psm2_mq_t	mq;
    psm2_epid_t	epid; /* unique lid+port identifier */
    psm2_uuid_t  unique_job_key;
    struct psm2_ep_open_opts ep_opt;
    unsigned long long *uu = (unsigned long long *) unique_job_key;
    char *generated_key;
    char env_string[256];
    int rc;

    generated_key = getenv("OMPI_MCA_orte_precondition_transports");
    memset(uu, 0, sizeof(psm2_uuid_t));

    if (!generated_key || (strlen(generated_key) != 33) ||
        sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2)
    {
      opal_show_help("help-mtl-psm2.txt",
		     "no uuid present", true,
		     generated_key ? "could not be parsed from" :
		     "not present in", ompi_process_info.nodename);
      return OMPI_ERROR;

    }

    /* Handle our own errors for opening endpoints */
    psm2_error_register_handler(ompi_mtl_psm2.ep, ompi_mtl_psm2_errhandler);

    /* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM2 can allocate hardware
     * contexts correctly.
     */
    snprintf(env_string, sizeof(env_string), "%d", local_rank);
    setenv("MPI_LOCALRANKID", env_string, 0);
    snprintf(env_string, sizeof(env_string), "%d", num_local_procs);
    setenv("MPI_LOCALNRANKS", env_string, 0);

    /* Setup the endpoint options. */
    psm2_ep_open_opts_get_defaults(&ep_opt);
    ep_opt.timeout = ompi_mtl_psm2.connect_timeout * 1e9;
    ep_opt.affinity = PSM2_EP_OPEN_AFFINITY_SKIP; /* do not let PSM2 set affinity */

    /* Open PSM2 endpoint */
    err = psm2_ep_open(unique_job_key, &ep_opt, &ep, &epid);
    if (err) {
      opal_show_help("help-mtl-psm2.txt",
		     "unable to open endpoint", true,
		     psm2_error_get_string(err));
      return OMPI_ERROR;
    }

    /* Future errors are handled by the default error handler */
    psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_DEFAULT);

    err = psm2_mq_init(ep,
		      0xffff000000000000ULL,
		      NULL,
		      0,
		      &mq);
    if (err) {
      opal_show_help("help-mtl-psm2.txt",
		     "psm2 init", true,
		     psm2_error_get_string(err));
      return OMPI_ERROR;
    }

    ompi_mtl_psm2.ep   = ep;
    ompi_mtl_psm2.epid = epid;
    ompi_mtl_psm2.mq   = mq;

    OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
                    &mca_mtl_psm2_component.super.mtl_version,
                    &ompi_mtl_psm2.epid,
                    sizeof(psm2_epid_t));

    if (OMPI_SUCCESS != rc) {
	opal_output(0, "Open MPI couldn't send PSM2 epid to head node process");
	return OMPI_ERROR;
    }


    /* register the psm2 progress function */
    opal_progress_register(ompi_mtl_psm2_progress);

    return OMPI_SUCCESS;
}
示例#5
0
文件: mtl_psm2.c 项目: 00datman/ompi
int ompi_mtl_psm2_progress( void ) {
    psm2_error_t err;
    mca_mtl_psm2_request_t* mtl_psm2_request;
    psm2_mq_status2_t psm2_status;
    psm2_mq_req_t req;
    int completed = 1;

    do {
        err = psm2_mq_ipeek2(ompi_mtl_psm2.mq, &req, NULL);
	if (err == PSM2_MQ_INCOMPLETE) {
	    return completed;
	} else if (err != PSM2_OK) {
	    goto error;
	}

	completed++;

	err = psm2_mq_test2(&req, &psm2_status);
	if (err != PSM2_OK) {
	    goto error;
	}

        mtl_psm2_request = (mca_mtl_psm2_request_t*) psm2_status.context;

	if (mtl_psm2_request->type == OMPI_mtl_psm2_IRECV) {

        mtl_psm2_request->super.ompi_req->req_status.MPI_SOURCE =
            psm2_status.msg_tag.tag1;
	    mtl_psm2_request->super.ompi_req->req_status.MPI_TAG =
		    psm2_status.msg_tag.tag0;
            mtl_psm2_request->super.ompi_req->req_status._ucount =
                psm2_status.nbytes;

            ompi_mtl_datatype_unpack(mtl_psm2_request->convertor,
                                     mtl_psm2_request->buf,
                                     psm2_status.msg_length);
	}

	if(mtl_psm2_request->type == OMPI_mtl_psm2_ISEND) {
	  if (mtl_psm2_request->free_after) {
	    free(mtl_psm2_request->buf);
	  }
	}

	switch (psm2_status.error_code) {
	    case PSM2_OK:
		mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
		    OMPI_SUCCESS;
		break;
	    case PSM2_MQ_TRUNCATION:
		mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
		    MPI_ERR_TRUNCATE;
		break;
	    default:
		mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
                        MPI_ERR_INTERN;
	}

	mtl_psm2_request->super.completion_callback(&mtl_psm2_request->super);

    }
    while (1);

 error:
    opal_show_help("help-mtl-psm2.txt",
		   "error polling network", true,
		   psm2_error_get_string(err));
    return 1;
}
示例#6
0
文件: mtl_psm2.c 项目: 00datman/ompi
int
ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t *mtl,
                      size_t nprocs,
                      struct ompi_proc_t** procs)
{
    int i,j;
    int rc;
    psm2_epid_t   *epids_in = NULL;
    int *mask_in = NULL;
    psm2_epid_t	 *epid;
    psm2_epaddr_t *epaddrs_out = NULL;
    psm2_error_t  *errs_out = NULL, err;
    size_t size;
    int proc_errors[PSM2_ERROR_LAST] = { 0 };
    int timeout_in_secs;

    assert(mtl == &ompi_mtl_psm2.super);
    rc = OMPI_ERR_OUT_OF_RESOURCE;

    errs_out = (psm2_error_t *) malloc(nprocs * sizeof(psm2_error_t));
    if (errs_out == NULL) {
	goto bail;
    }
    epids_in = (psm2_epid_t *) malloc(nprocs * sizeof(psm2_epid_t));
    if (epids_in == NULL) {
	goto bail;
    }
    mask_in = (int *) malloc(nprocs * sizeof(int));
    if (mask_in == NULL) {
	goto bail;
    }
    epaddrs_out = (psm2_epaddr_t *) malloc(nprocs * sizeof(psm2_epaddr_t));
    if (epaddrs_out == NULL) {
	goto bail;
    }
    rc = OMPI_SUCCESS;

    /* Get the epids for all the processes from modex */
    for (i = 0; i < (int) nprocs; i++) {
        if (NULL != procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]) {
            /* Already connected: don't connect again */
            mask_in[i] = 0;
            continue;
        }

        OPAL_MODEX_RECV(rc, &mca_mtl_psm2_component.super.mtl_version,
                        &procs[i]->super.proc_name, (void**)&epid, &size);
	if (rc != OMPI_SUCCESS || size != sizeof(psm2_epid_t)) {
	  return OMPI_ERROR;
	}
	epids_in[i] = *epid;
	mask_in[i] = 1;
    }

    timeout_in_secs = max(ompi_mtl_psm2.connect_timeout, 0.5 * nprocs);

    psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_NOP);

    err = psm2_ep_connect(ompi_mtl_psm2.ep,
			 nprocs,
			 epids_in,
			 mask_in,
			 errs_out,
			 epaddrs_out,
			 timeout_in_secs * 1e9);
    if (err) {
	char *errstr = (char *) ompi_mtl_psm2_connect_error_msg(err);
	if (errstr == NULL) {
	    opal_output(0, "PSM2 returned unhandled/unknown connect error: %s\n",
			psm2_error_get_string(err));
	}
	for (i = 0; i < (int) nprocs; i++) {
            if (0 == mask_in[i]) {
                    continue;
            }

	    psm2_error_t thiserr = errs_out[i];
	    errstr = (char *) ompi_mtl_psm2_connect_error_msg(thiserr);
	    if (proc_errors[thiserr] == 0) {
		proc_errors[thiserr] = 1;
		opal_output(0, "PSM2 EP connect error (%s):",
			    errstr ? errstr : "unknown connect error");
		for (j = 0; j < (int) nprocs; j++) {
		  if (errs_out[j] == thiserr) {
                      opal_output(0, " %s", (NULL == procs[j]->super.proc_hostname) ?
                                  "unknown" : procs[j]->super.proc_hostname);
		  }
		}
		opal_output(0, "\n");
	    }
	}

	rc = OMPI_ERROR;
    }
    else {
	/* Default error handling is enabled, errors will not be returned to
	 * user.  PSM2 prints the error and the offending endpoint's hostname
	 * and exits with -1 */
	psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_DEFAULT);

	/* Fill in endpoint data */
	for (i = 0; i < (int) nprocs; i++) {
            if (0 == mask_in[i]) {
                    continue;
            }

            mca_mtl_psm2_endpoint_t *endpoint =
		(mca_mtl_psm2_endpoint_t *) OBJ_NEW(mca_mtl_psm2_endpoint_t);
	    endpoint->peer_epid = epids_in[i];
	    endpoint->peer_addr = epaddrs_out[i];
            procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
	}

	rc = OMPI_SUCCESS;
    }

bail:
    if (epids_in != NULL) {
	free(epids_in);
    }
    if (mask_in != NULL) {
        free(mask_in);
    }
    if (errs_out != NULL) {
	free(errs_out);
    }
    if (epaddrs_out != NULL) {
	free(epaddrs_out);
    }

    return rc;
}
示例#7
0
psm2_error_t
__psm2_ep_connect(psm2_ep_t ep, int num_of_epid, psm2_epid_t const *array_of_epid,
		 int const *array_of_epid_mask,	/* can be NULL */
		 psm2_error_t *array_of_errors, psm2_epaddr_t *array_of_epaddr,
		 int64_t timeout)
{
	psm2_error_t err = PSM2_OK;
	ptl_ctl_t *ptlctl;
	ptl_t *ptl;
	int i, j, dup_idx;
	int num_toconnect = 0;
	int *epid_mask = NULL;
	int *epid_mask_isdupof = NULL;
	char *device;
	uint64_t t_start = get_cycles();
	uint64_t t_left;
	union psmi_envvar_val timeout_intval;

	PSM2_LOG_MSG("entering");
	PSMI_ERR_UNLESS_INITIALIZED(ep);

	PSMI_PLOCK();

	/*
	 * Normally we would lock here, but instead each implemented ptl component
	 * does its own locking.  This is mostly because the ptl components are
	 * ahead of the PSM interface in that they can disconnect their peers.
	 */
	if (ep == NULL || array_of_epaddr == NULL || array_of_epid == NULL ||
	    num_of_epid < 1) {
		err = psmi_handle_error(ep, PSM2_PARAM_ERR,
					"Invalid psm2_ep_connect parameters");
		goto fail;
	}

	/* We need two of these masks to detect duplicates */
	err = PSM2_NO_MEMORY;
	epid_mask =
	    (int *)psmi_malloc(ep, UNDEFINED, sizeof(int) * num_of_epid);
	if (epid_mask == NULL)
		goto fail;
	epid_mask_isdupof =
	    (int *)psmi_malloc(ep, UNDEFINED, sizeof(int) * num_of_epid);
	if (epid_mask_isdupof == NULL)
		goto fail;
	err = PSM2_OK;

	/* Eventually handle timeouts across all connects. */
	for (j = 0; j < num_of_epid; j++) {
		if (array_of_epid_mask != NULL && !array_of_epid_mask[j])
			epid_mask[j] = 0;
		else {
			epid_mask[j] = 1;
			array_of_errors[j] = PSM2_EPID_UNKNOWN;
			array_of_epaddr[j] = NULL;
			num_toconnect++;
		}
		epid_mask_isdupof[j] = -1;
	}

	psmi_getenv("PSM2_CONNECT_TIMEOUT",
		    "End-point connection timeout over-ride. 0 for no time-out.",
		    PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_UINT,
		    (union psmi_envvar_val)0, &timeout_intval);

	if (getenv("PSM2_CONNECT_TIMEOUT")) {
		timeout = timeout_intval.e_uint * SEC_ULL;
	} else if (timeout > 0) {
		/* The timeout parameter provides the minimum timeout. A heuristic
		 * is used to scale up the timeout linearly with the number of
		 * endpoints, and we allow one second per 100 endpoints. */
		timeout = max(timeout, (num_toconnect * SEC_ULL) / 100);
	}

	if (timeout > 0 && timeout < PSMI_MIN_EP_CONNECT_TIMEOUT)
		timeout = PSMI_MIN_EP_CONNECT_TIMEOUT;
	_HFI_PRDBG("Connect to %d endpoints with time-out of %.2f secs\n",
		   num_toconnect, (double)timeout / 1e9);

	/* Look for duplicates in input array */
	for (i = 0; i < num_of_epid; i++) {
		for (j = i + 1; j < num_of_epid; j++) {
			if (array_of_epid[i] == array_of_epid[j] &&
			    epid_mask[i] && epid_mask[j]) {
				epid_mask[j] = 0;	/* don't connect more than once */
				epid_mask_isdupof[j] = i;
			}
		}
	}

	for (i = 0; i < PTL_MAX_INIT; i++) {
		if (ep->devid_enabled[i] == -1)
			continue;
		/* Set up the right connect ptrs */
		switch (ep->devid_enabled[i]) {
		case PTL_DEVID_IPS:
			ptlctl = &ep->ptl_ips;
			ptl = ep->ptl_ips.ptl;
			device = "ips";
			break;
		case PTL_DEVID_AMSH:
			ptlctl = &ep->ptl_amsh;
			ptl = ep->ptl_amsh.ptl;
			device = "amsh";
			break;
		case PTL_DEVID_SELF:
			ptlctl = &ep->ptl_self;
			ptl = ep->ptl_self.ptl;
			device = "self";
			break;
		default:
			device = "unknown";
			ptlctl = &ep->ptl_ips;	/*no-unused */
			ptl = ep->ptl_ips.ptl;	/*no-unused */
			device = "ips";	/*no-unused */
			psmi_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
					  "Unknown/unhandled PTL id %d\n",
					  ep->devid_enabled[i]);
			break;
		}
		t_left = psmi_cycles_left(t_start, timeout);

		_HFI_VDBG("Trying to connect with device %s\n", device);
		if ((err = ptlctl->ep_connect(ptl, num_of_epid, array_of_epid,
					      epid_mask, array_of_errors,
					      array_of_epaddr,
					      cycles_to_nanosecs(t_left)))) {
			_HFI_PRDBG("Connect failure in device %s err=%d\n",
				   device, err);
			goto connect_fail;
		}

		/* Now process what's been connected */
		for (j = 0; j < num_of_epid; j++) {
			dup_idx = epid_mask_isdupof[j];
			if (!epid_mask[j] && dup_idx == -1)
				continue;

			if (dup_idx != -1) {	/* dup */
				array_of_epaddr[j] = array_of_epaddr[dup_idx];
				array_of_errors[j] = array_of_errors[dup_idx];
				epid_mask_isdupof[j] = -1;
			}

			if (array_of_errors[j] == PSM2_OK) {
				epid_mask[j] = 0;	/* don't try on next ptl */
				ep->connections++;
			}
		}
	}

	for (i = 0; i < num_of_epid; i++) {
		ptl_ctl_t *c = NULL;
		if (array_of_epid_mask != NULL && !array_of_epid_mask[i])
			continue;
		/* If we see unreachable here, that means some PTLs were not enabled */
		if (array_of_errors[i] == PSM2_EPID_UNREACHABLE) {
			err = PSM2_EPID_UNREACHABLE;
			break;
		}

		psmi_assert_always(array_of_epaddr[i] != NULL);
		c = array_of_epaddr[i]->ptlctl;
		psmi_assert_always(c != NULL);
		_HFI_VDBG("%-20s DEVICE %s (%p)\n",
			  psmi_epaddr_get_name(array_of_epid[i]),
			  c == &ep->ptl_ips ? "hfi" :
			  (c == &ep->ptl_amsh ? "amsh" : "self"),
			  (void *)array_of_epaddr[i]->ptlctl->ptl);
	}

connect_fail:
	/* If the error is a timeout (at worse) and the client is OPA MPI,
	 * just return timeout to let OPA MPI handle the hostnames that
	 * timed out */
	if (err != PSM2_OK) {
		char errbuf[PSM2_ERRSTRING_MAXLEN];
		size_t len;
		int j = 0;

		if (err == PSM2_EPID_UNREACHABLE) {
			char *deverr = "of an incorrect setting";
			char *eperr = " ";
			char *devname = NULL;
			if (!psmi_ep_device_is_enabled(ep, PTL_DEVID_AMSH)) {
				deverr =
				    "there is no shared memory PSM device (shm)";
				eperr = " shared memory ";
			} else
			    if (!psmi_ep_device_is_enabled(ep, PTL_DEVID_IPS)) {
				deverr =
				    "there is no OPA PSM device (hfi)";
				eperr = " OPA ";
			}

			len = snprintf(errbuf, sizeof(errbuf) - 1,
				       "Some%sendpoints could not be connected because %s "
				       "in the currently enabled PSM_DEVICES (",
				       eperr, deverr);
			for (i = 0; i < PTL_MAX_INIT && len < sizeof(errbuf) - 1;
			     i++) {
				switch (ep->devid_enabled[i]) {
				case PTL_DEVID_IPS:
					devname = "hfi";
					break;
				case PTL_DEVID_AMSH:
					devname = "shm";
					break;
				case PTL_DEVID_SELF:
				default:
					devname = "self";
					break;
				}
				len +=
				    snprintf(errbuf + len,
					     sizeof(errbuf) - len - 1, "%s,",
					     devname);
			}
			if (len < sizeof(errbuf) - 1 && devname != NULL)
				/* parsed something, remove trailing comma */
				errbuf[len - 1] = ')';
		} else
			len = snprintf(errbuf, sizeof(errbuf) - 1,
				       "%s", err == PSM2_TIMEOUT ?
				       "Dectected connection timeout" :
				       psm2_error_get_string(err));

		/* first pass, look for all nodes with the error */
		for (i = 0; i < num_of_epid && len < sizeof(errbuf) - 1; i++) {
			if (array_of_epid_mask != NULL
			    && !array_of_epid_mask[i])
				continue;
			if (array_of_errors[i] == PSM2_OK)
				continue;
			if (array_of_errors[i] == PSM2_EPID_UNREACHABLE &&
			    err != PSM2_EPID_UNREACHABLE)
				continue;
			if (err == array_of_errors[i]) {
				len +=
				    snprintf(errbuf + len,
					     sizeof(errbuf) - len - 1, "%c %s",
					     j == 0 ? ':' : ',',
					     psmi_epaddr_get_hostname
					     (array_of_epid[i]));
				j++;
			}
		}
		errbuf[sizeof(errbuf) - 1] = '\0';
		err = psmi_handle_error(ep, err, errbuf);
	}

fail:
	PSMI_PUNLOCK();

	if (epid_mask != NULL)
		psmi_free(epid_mask);
	if (epid_mask_isdupof != NULL)
		psmi_free(epid_mask_isdupof);

	PSM2_LOG_MSG("leaving");
	return err;
}
示例#8
0
static int psmx2_av_connet_eps(struct psmx2_fid_av *av, size_t count,
			       psm2_epid_t *epids, int *mask,
			       psm2_error_t *errors,
			       psm2_epaddr_t *epaddrs,
			       void *context)
{
	int i;
	psm2_epconn_t epconn;
	struct psmx2_epaddr_context *epaddr_context;
	int error_count = 0;

	/* set up mask to prevent connecting to an already connected ep */
	for (i=0; i<count; i++) {
		if (psm2_ep_epid_lookup(epids[i], &epconn) == PSM2_OK) {
			epaddr_context = psm2_epaddr_getctxt(epconn.addr);
			if (epaddr_context && epaddr_context->epid == epids[i])
				epaddrs[i] = epconn.addr;
			else
				mask[i] = 1;
		} else {
			mask[i] = 1;
		}
	}

	psm2_ep_connect(av->domain->psm2_ep, count, epids, mask, errors,
			epaddrs, psmx2_conn_timeout(count));

	for (i=0; i<count; i++){
		if (!mask[i])
			continue;

		if (errors[i] == PSM2_OK ||
		    errors[i] == PSM2_EPID_ALREADY_CONNECTED) {
			psmx2_set_epaddr_context(av->domain, epids[i], epaddrs[i]);
		} else {
			/* If duplicated addrs are passed to psm2_ep_connect(),
			 * all but one will fail with error "Endpoint could not
			 * be reached". This should be treated the same as
			 * "Endpoint already connected".
			 */
			if (psm2_ep_epid_lookup(epids[i], &epconn) == PSM2_OK) {
				epaddr_context = psm2_epaddr_getctxt(epconn.addr);
				if (epaddr_context &&
				    epaddr_context->epid == epids[i]) {
					epaddrs[i] = epconn.addr;
					continue;
				}
			}

			FI_INFO(&psmx2_prov, FI_LOG_AV,
				"%d: psm2_ep_connect returned %s. remote epid=%lx.\n",
				i, psm2_error_get_string(errors[i]), epids[i]);
			if (epids[i] == 0)
				FI_INFO(&psmx2_prov, FI_LOG_AV,
					"does the application depend on the provider"
					"to resolve IP address into endpoint id? if so"
					"check if the name server has started correctly"
					"at the other side.\n");
			epaddrs[i] = (void *)FI_ADDR_NOTAVAIL;
			error_count++;

			if (av->flags & FI_EVENT)
				psmx2_av_post_completion(av, context, i, errors[i]);
		}
	}

	return error_count;
}