int psmx_epid_to_epaddr(struct psmx_fid_domain *domain, psm_epid_t epid, psm_epaddr_t *epaddr) { int err; psm_error_t errors; psm_epconn_t epconn; struct psmx_epaddr_context *context; err = psm_ep_epid_lookup(epid, &epconn); if (err == PSM_OK) { context = psm_epaddr_getctxt(epconn.addr); if (context && context->epid == epid) { *epaddr = epconn.addr; return 0; } } err = psm_ep_connect(domain->psm_ep, 1, &epid, NULL, &errors, epaddr, 30*1e9); if (err != PSM_OK) return psmx_errno(err); psmx_set_epaddr_context(domain,epid,*epaddr); return 0; }
static int psmx_av_insert(struct fid_av *av, const void *addr, size_t count, fi_addr_t *fi_addr, uint64_t flags, void *context) { struct psmx_fid_av *av_priv; psm_error_t *errors; int error_count = 0; int *mask; int i, j, ret; fi_addr_t *result = NULL; struct psmx_epaddr_context *epaddr_context; if (count && !addr) { FI_INFO(&psmx_prov, FI_LOG_AV, "the input address array is NULL.\n"); return -FI_EINVAL; } av_priv = container_of(av, struct psmx_fid_av, av); if ((av_priv->flags & FI_EVENT) && !av_priv->eq) return -FI_ENOEQ; errors = (psm_error_t *) calloc(count, sizeof *errors); if (!errors) return -FI_ENOMEM; mask = (int *) calloc(count, sizeof *mask); if (!mask) { free(errors); return -FI_ENOMEM; } if (av_priv->type == FI_AV_TABLE) { if (psmx_av_check_table_size(av_priv, count)) { free(mask); free(errors); return -FI_ENOMEM; } for (i=0; i<count; i++) av_priv->psm_epids[av_priv->last + i] = ((psm_epid_t *)addr)[i]; result = fi_addr; addr = (const void *)(av_priv->psm_epids + av_priv->last); fi_addr = (fi_addr_t *)(av_priv->psm_epaddrs + av_priv->last); } /* prevent connecting to the same ep twice, which is fatal in PSM */ for (i=0; i<count; i++) { psm_epconn_t epconn; if (psm_ep_epid_lookup(((psm_epid_t *) addr)[i], &epconn) == PSM_OK) { epaddr_context = psm_epaddr_getctxt(epconn.addr); if (epaddr_context && epaddr_context->epid == ((psm_epid_t *) addr)[i]) ((psm_epaddr_t *) fi_addr)[i] = epconn.addr; else mask[i] = 1; } else { mask[i] = 1; } } psm_ep_connect(av_priv->domain->psm_ep, count, (psm_epid_t *) addr, mask, errors, (psm_epaddr_t *) fi_addr, 30*1e9); for (i=0; i<count; i++){ if (!mask[i]) { errors[i] = PSM_OK; continue; } if (errors[i] == PSM_OK || errors[i] == PSM_EPID_ALREADY_CONNECTED) { psmx_set_epaddr_context(av_priv->domain, ((psm_epid_t *) addr)[i], ((psm_epaddr_t *) fi_addr)[i]); errors[i] = PSM_OK; } else { psm_epconn_t epconn; /* If duplicated addresses are passed to psm_ep_connect(), all but one will fail * with error "Endpoint could not be reached". They should be treated as already * connected. */ if (psm_ep_epid_lookup(((psm_epid_t *) addr)[i], &epconn) == PSM_OK) { epaddr_context = psm_epaddr_getctxt(epconn.addr); if (epaddr_context && epaddr_context->epid == ((psm_epid_t *) addr)[i]) { ((psm_epaddr_t *) fi_addr)[i] = epconn.addr; errors[i] = PSM_OK; continue; } } FI_INFO(&psmx_prov, FI_LOG_AV, "%d: psm_ep_connect returned %s. remote epid=%lx.\n", i, psm_error_get_string(errors[i]), ((psm_epid_t *)addr)[i]); if (((psm_epid_t *)addr)[i] == 0) FI_INFO(&psmx_prov, FI_LOG_AV, "does the application depend on the provider" "to resolve IP address into endpoint id? if so" "check if the name server has started correctly" "at the other side.\n"); fi_addr[i] = FI_ADDR_NOTAVAIL; error_count++; if (av_priv->flags & FI_EVENT) psmx_av_post_completion(av_priv, context, i, errors[i]); } } if (av_priv->type == FI_AV_TABLE) { /* NOTE: unresolved addresses are left in the AV table */ if (result) { for (i=0; i<count; i++) { j = av_priv->last + i; if ((fi_addr_t)av_priv->psm_epaddrs[j] == FI_ADDR_NOTAVAIL) result[i] = FI_ADDR_NOTAVAIL; else result[i] = j; } } av_priv->last += count; } if (av_priv->flags & FI_EVENT) { psmx_av_post_completion(av_priv, context, count - error_count, 0); ret = 0; } else { if (flags & FI_SYNC_ERR) { int *fi_errors = context; for (i=0; i<count; i++) fi_errors[i] = psmx_errno(errors[i]); } ret = count - error_count; } free(mask); free(errors); return ret; }
static int psmx_av_insert(struct fid_av *av, const void *addr, size_t count, fi_addr_t *fi_addr, uint64_t flags, void *context) { struct psmx_fid_av *av_priv; psm_error_t *errors; int error_count = 0; int *mask; int i, j; fi_addr_t *result = NULL; struct psmx_epaddr_context *epaddr_context; av_priv = container_of(av, struct psmx_fid_av, av); errors = (psm_error_t *) calloc(count, sizeof *errors); if (!errors) return -FI_ENOMEM; mask = (int *) calloc(count, sizeof *mask); if (!mask) { free(errors); return -FI_ENOMEM; } if (av_priv->type == FI_AV_TABLE) { if (psmx_av_check_table_size(av_priv, count)) { free(mask); free(errors); return -FI_ENOMEM; } for (i=0; i<count; i++) av_priv->psm_epids[av_priv->last + i] = ((psm_epid_t *)addr)[i]; result = fi_addr; addr = (const void *)(av_priv->psm_epids + av_priv->last); fi_addr = (fi_addr_t *)(av_priv->psm_epaddrs + av_priv->last); } /* prevent connecting to the same ep twice, which is fatal in PSM */ for (i=0; i<count; i++) { psm_epconn_t epconn; if (psm_ep_epid_lookup(((psm_epid_t *) addr)[i], &epconn) == PSM_OK) { epaddr_context = psm_epaddr_getctxt(epconn.addr); if (epaddr_context && epaddr_context->epid == ((psm_epid_t *) addr)[i]) ((psm_epaddr_t *) fi_addr)[i] = epconn.addr; else mask[i] = 1; } else { mask[i] = 1; } } psm_ep_connect(av_priv->domain->psm_ep, count, (psm_epid_t *) addr, mask, errors, (psm_epaddr_t *) fi_addr, 30*1e9); for (i=0; i<count; i++){ if (!mask[i]) continue; if (errors[i] == PSM_OK || errors[i] == PSM_EPID_ALREADY_CONNECTED) { psmx_set_epaddr_context(av_priv->domain, ((psm_epid_t *) addr)[i], ((psm_epaddr_t *) fi_addr)[i]); } else { FI_INFO(&psmx_prov, FI_LOG_AV, "%d: psm_ep_connect returned %s. remote epid=%lx.\n", i, psm_error_get_string(errors[i]), ((psm_epid_t *)addr)[i]); if (((psm_epid_t *)addr)[i] == 0) FI_INFO(&psmx_prov, FI_LOG_AV, "does the application depend on the provider" "to resolve IP address into endpoint id? if so" "check if the name server has started correctly" "at the other side.\n"); fi_addr[i] = FI_ADDR_NOTAVAIL; error_count++; } } free(mask); free(errors); if (av_priv->type == FI_AV_TABLE) { /* NOTE: unresolved addresses are left in the AV table */ if (result) { for (i=0; i<count; i++) { j = av_priv->last + i; if ((fi_addr_t)av_priv->psm_epaddrs[j] == FI_ADDR_NOTAVAIL) result[i] = FI_ADDR_NOTAVAIL; else result[i] = j; } } av_priv->last += count; } return count - error_count; }
static int psmx_av_insert(struct fid_av *av, const void *addr, size_t count, fi_addr_t *fi_addr, uint64_t flags) { struct psmx_fid_av *fid_av; psm_error_t *errors; int *mask; int err; int i; fi_addr_t *result = NULL; struct psmx_epaddr_context *context; fid_av = container_of(av, struct psmx_fid_av, av); /* TODO: support the FI_RANGE flag */ if (flags) return -FI_EBADFLAGS; errors = (psm_error_t *) calloc(count, sizeof *errors); if (!errors) return -ENOMEM; mask = (int *) calloc(count, sizeof *mask); if (!mask) { free(errors); return -ENOMEM; } if (fid_av->type == FI_AV_TABLE) { if (psmx_av_check_table_size(fid_av, count)) { free(mask); free(errors); return -ENOMEM; } for (i=0; i<count; i++) fid_av->psm_epids[fid_av->last + i] = ((psm_epid_t *)addr)[i]; result = fi_addr; addr = (const void *)(fid_av->psm_epids + fid_av->last); fi_addr = (fi_addr_t *)(fid_av->psm_epaddrs + fid_av->last); } /* prevent connecting to the same ep twice, which is fatal in PSM */ for (i=0; i<count; i++) { psm_epconn_t epconn; if (psm_ep_epid_lookup(((psm_epid_t *) addr)[i], &epconn) == PSM_OK) { context = psm_epaddr_getctxt(epconn.addr); if (context && context->epid == ((psm_epid_t *) addr)[i]) ((psm_epaddr_t *) fi_addr)[i] = epconn.addr; else mask[i] = 1; } else { mask[i] = 1; } } err = psm_ep_connect(fid_av->domain->psm_ep, count, (psm_epid_t *) addr, mask, errors, (psm_epaddr_t *) fi_addr, 30*1e9); for (i=0; i<count; i++){ if (mask[i] && errors[i] == PSM_OK) { psmx_set_epaddr_context(fid_av->domain, ((psm_epid_t *) addr)[i], ((psm_epaddr_t *) fi_addr)[i]); } } free(mask); free(errors); if (fid_av->type == FI_AV_TABLE) { if (result) { for (i=0; i<count; i++) result[i] = fid_av->last + i; } fid_av->last += count; } return psmx_errno(err); }