int psmx_av_open(struct fid_domain *domain, struct fi_av_attr *attr, struct fid_av **av, void *context) { struct psmx_fid_domain *domain_priv; struct psmx_fid_av *av_priv; int type = FI_AV_MAP; size_t count = 64; domain_priv = container_of(domain, struct psmx_fid_domain, domain); if (attr) { switch (attr->type) { case FI_AV_MAP: case FI_AV_TABLE: type = attr->type; break; default: PSMX_DEBUG("attr->type=%d, supported=%d %d\n", attr->type, FI_AV_MAP, FI_AV_TABLE); return -FI_EINVAL; } count = attr->count; } av_priv = (struct psmx_fid_av *) calloc(1, sizeof *av_priv); if (!av_priv) return -FI_ENOMEM; av_priv->domain = domain_priv; av_priv->type = type; av_priv->addrlen = sizeof(psm_epaddr_t); av_priv->count = count; av_priv->av.fid.fclass = FI_CLASS_AV; av_priv->av.fid.context = context; av_priv->av.fid.ops = &psmx_fi_ops; av_priv->av.ops = &psmx_av_ops; *av = &av_priv->av; return 0; }
int psmx_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, struct fid_cntr **cntr, void *context) { struct psmx_fid_domain *domain_priv; struct psmx_fid_cntr *cntr_priv; struct psmx_fid_wait *wait = NULL; struct fi_wait_attr wait_attr; int wait_is_local = 0; int events; uint64_t flags; int err; events = FI_CNTR_EVENTS_COMP; flags = 0; domain_priv = container_of(domain, struct psmx_fid_domain, domain); switch (attr->events) { case FI_CNTR_EVENTS_COMP: events = attr->events; break; default: PSMX_DEBUG("%s: attr->events=%d, supported=%d\n", __func__, attr->events, FI_CNTR_EVENTS_COMP); return -EINVAL; } switch (attr->wait_obj) { case FI_WAIT_NONE: case FI_WAIT_UNSPEC: break; case FI_WAIT_SET: if (!attr->wait_set) { PSMX_DEBUG("%s: FI_WAIT_SET is specified but attr->wait_set is NULL\n", __func__); return -FI_EINVAL; } wait = (struct psmx_fid_wait *)attr->wait_set; break; case FI_WAIT_FD: case FI_WAIT_MUTEX_COND: wait_attr.wait_obj = attr->wait_obj; wait_attr.flags = 0; err = psmx_wait_open(&domain_priv->fabric->fabric, &wait_attr, (struct fid_wait **)&wait); if (err) return err; wait_is_local = 1; break; default: PSMX_DEBUG("%s: attr->wait_obj=%d, supported=%d...%d\n", __func__, attr->wait_obj, FI_WAIT_NONE, FI_WAIT_MUTEX_COND); return -FI_EINVAL; } cntr_priv = (struct psmx_fid_cntr *) calloc(1, sizeof *cntr_priv); if (!cntr_priv) return -ENOMEM; cntr_priv->domain = domain_priv; cntr_priv->events = events; cntr_priv->wait = wait; cntr_priv->wait_is_local = wait_is_local; cntr_priv->flags = flags; cntr_priv->cntr.fid.fclass = FI_CLASS_CNTR; cntr_priv->cntr.fid.context = context; cntr_priv->cntr.fid.ops = &psmx_fi_ops; cntr_priv->cntr.ops = &psmx_cntr_ops; pthread_mutex_init(&cntr_priv->trigger_lock, NULL); *cntr = &cntr_priv->cntr; return 0; }
static int psmx_av_insert(struct fid_av *av, const void *addr, size_t count, fi_addr_t *fi_addr, uint64_t flags, void *context) { struct psmx_fid_av *av_priv; psm_error_t *errors; int error_count = 0; int *mask; int i, j; fi_addr_t *result = NULL; struct psmx_epaddr_context *epaddr_context; av_priv = container_of(av, struct psmx_fid_av, av); errors = (psm_error_t *) calloc(count, sizeof *errors); if (!errors) return -FI_ENOMEM; mask = (int *) calloc(count, sizeof *mask); if (!mask) { free(errors); return -FI_ENOMEM; } if (av_priv->type == FI_AV_TABLE) { if (psmx_av_check_table_size(av_priv, count)) { free(mask); free(errors); return -FI_ENOMEM; } for (i=0; i<count; i++) av_priv->psm_epids[av_priv->last + i] = ((psm_epid_t *)addr)[i]; result = fi_addr; addr = (const void *)(av_priv->psm_epids + av_priv->last); fi_addr = (fi_addr_t *)(av_priv->psm_epaddrs + av_priv->last); } /* prevent connecting to the same ep twice, which is fatal in PSM */ for (i=0; i<count; i++) { psm_epconn_t epconn; if (psm_ep_epid_lookup(((psm_epid_t *) addr)[i], &epconn) == PSM_OK) { epaddr_context = psm_epaddr_getctxt(epconn.addr); if (epaddr_context && epaddr_context->epid == ((psm_epid_t *) addr)[i]) ((psm_epaddr_t *) fi_addr)[i] = epconn.addr; else mask[i] = 1; } else { mask[i] = 1; } } psm_ep_connect(av_priv->domain->psm_ep, count, (psm_epid_t *) addr, mask, errors, (psm_epaddr_t *) fi_addr, 30*1e9); for (i=0; i<count; i++){ if (!mask[i]) continue; if (errors[i] == PSM_OK || errors[i] == PSM_EPID_ALREADY_CONNECTED) { psmx_set_epaddr_context(av_priv->domain, ((psm_epid_t *) addr)[i], ((psm_epaddr_t *) fi_addr)[i]); } else { PSMX_DEBUG("%d: psm_ep_connect returned %s. remote epid=%lx.\n", i, psm_error_get_string(errors[i]), ((psm_epid_t *)addr)[i]); if (((psm_epid_t *)addr)[i] == 0) PSMX_DEBUG("does the application depend on the provider" "to resolve IP address into endpoint id? if so" "check if the name server has started correctly" "at the other side.\n"); fi_addr[i] = FI_ADDR_NOTAVAIL; error_count++; } } free(mask); free(errors); if (av_priv->type == FI_AV_TABLE) { /* NOTE: unresolved addresses are left in the AV table */ if (result) { for (i=0; i<count; i++) { j = av_priv->last + i; if ((fi_addr_t)av_priv->psm_epaddrs[j] == FI_ADDR_NOTAVAIL) result[i] = FI_ADDR_NOTAVAIL; else result[i] = j; } } av_priv->last += count; } return count - error_count; }
void psmx_cntr_check_trigger(struct psmx_fid_cntr *cntr) { struct psmx_trigger *trigger; if (!cntr->trigger) return; pthread_mutex_lock(&cntr->trigger_lock); trigger = cntr->trigger; while (trigger) { if (cntr->counter < trigger->threshold) break; cntr->trigger = trigger->next; switch (trigger->op) { case PSMX_TRIGGERED_SEND: _psmx_send(trigger->send.ep, trigger->send.buf, trigger->send.len, trigger->send.desc, trigger->send.dest_addr, trigger->send.context, trigger->send.flags); break; case PSMX_TRIGGERED_RECV: _psmx_recv(trigger->recv.ep, trigger->recv.buf, trigger->recv.len, trigger->recv.desc, trigger->recv.src_addr, trigger->recv.context, trigger->recv.flags); break; case PSMX_TRIGGERED_TSEND: _psmx_tagged_send(trigger->tsend.ep, trigger->tsend.buf, trigger->tsend.len, trigger->tsend.desc, trigger->tsend.dest_addr, trigger->tsend.tag, trigger->tsend.context, trigger->tsend.flags); break; case PSMX_TRIGGERED_TRECV: _psmx_tagged_recv(trigger->trecv.ep, trigger->trecv.buf, trigger->trecv.len, trigger->trecv.desc, trigger->trecv.src_addr, trigger->trecv.tag, trigger->trecv.ignore, trigger->trecv.context, trigger->trecv.flags); break; case PSMX_TRIGGERED_WRITE: _psmx_write(trigger->write.ep, trigger->write.buf, trigger->write.len, trigger->write.desc, trigger->write.dest_addr, trigger->write.addr, trigger->write.key, trigger->write.context, trigger->write.flags, trigger->write.data); break; case PSMX_TRIGGERED_READ: _psmx_read(trigger->read.ep, trigger->read.buf, trigger->read.len, trigger->read.desc, trigger->read.src_addr, trigger->read.addr, trigger->read.key, trigger->read.context, trigger->read.flags); break; case PSMX_TRIGGERED_ATOMIC_WRITE: _psmx_atomic_write(trigger->atomic_write.ep, trigger->atomic_write.buf, trigger->atomic_write.count, trigger->atomic_write.desc, trigger->atomic_write.dest_addr, trigger->atomic_write.addr, trigger->atomic_write.key, trigger->atomic_write.datatype, trigger->atomic_write.atomic_op, trigger->atomic_write.context, trigger->atomic_write.flags); break; case PSMX_TRIGGERED_ATOMIC_READWRITE: _psmx_atomic_readwrite(trigger->atomic_readwrite.ep, trigger->atomic_readwrite.buf, trigger->atomic_readwrite.count, trigger->atomic_readwrite.desc, trigger->atomic_readwrite.result, trigger->atomic_readwrite.result_desc, trigger->atomic_readwrite.dest_addr, trigger->atomic_readwrite.addr, trigger->atomic_readwrite.key, trigger->atomic_readwrite.datatype, trigger->atomic_readwrite.atomic_op, trigger->atomic_readwrite.context, trigger->atomic_readwrite.flags); break; case PSMX_TRIGGERED_ATOMIC_COMPWRITE: _psmx_atomic_compwrite(trigger->atomic_compwrite.ep, trigger->atomic_compwrite.buf, trigger->atomic_compwrite.count, trigger->atomic_compwrite.desc, trigger->atomic_compwrite.compare, trigger->atomic_compwrite.compare_desc, trigger->atomic_compwrite.result, trigger->atomic_compwrite.result_desc, trigger->atomic_compwrite.dest_addr, trigger->atomic_compwrite.addr, trigger->atomic_compwrite.key, trigger->atomic_compwrite.datatype, trigger->atomic_compwrite.atomic_op, trigger->atomic_compwrite.context, trigger->atomic_compwrite.flags); break; default: PSMX_DEBUG("%s: %d unsupported op\n", __func__, trigger->op); break; } free(trigger); } pthread_mutex_unlock(&cntr->trigger_lock); }