int psmx_ep_open(struct fid_domain *domain, struct fi_info *info, struct fid_ep **ep, void *context) { struct psmx_fid_ep *fid_ep; fid_ep = (struct psmx_fid_ep *) calloc(1, sizeof *fid_ep); if (!fid_ep) return -ENOMEM; fid_ep->ep.fid.fclass = FID_CLASS_EP; fid_ep->ep.fid.context = context; fid_ep->ep.fid.ops = &psmx_fi_ops; fid_ep->ep.ops = &psmx_ep_ops; fid_ep->ep.cm = &psmx_cm_ops; fid_ep->ep.tagged = &psmx_tagged_ops; PSMX_CTXT_TYPE(&fid_ep->nocomp_send_context) = PSMX_NOCOMP_SEND_CONTEXT; PSMX_CTXT_EP(&fid_ep->nocomp_send_context) = fid_ep; PSMX_CTXT_TYPE(&fid_ep->nocomp_recv_context) = PSMX_NOCOMP_RECV_CONTEXT; PSMX_CTXT_EP(&fid_ep->nocomp_recv_context) = fid_ep; PSMX_CTXT_TYPE(&fid_ep->sendimm_context) = PSMX_INJECT_CONTEXT; PSMX_CTXT_EP(&fid_ep->sendimm_context) = fid_ep; PSMX_CTXT_TYPE(&fid_ep->writeimm_context) = PSMX_INJECT_WRITE_CONTEXT; PSMX_CTXT_EP(&fid_ep->writeimm_context) = fid_ep; if (info) { fid_ep->flags = info->op_flags; if (info->ep_cap & FI_MSG) { fid_ep->ep.msg = &psmx_msg_ops; } } *ep = &fid_ep->ep; return 0; }
ssize_t psmx_tagged_send_no_flag_av_map(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, uint64_t tag, void *context) { struct psmx_fid_ep *ep_priv; psm_epaddr_t psm_epaddr; psm_mq_req_t psm_req; uint64_t psm_tag; struct fi_context *fi_context; int err; ep_priv = container_of(ep, struct psmx_fid_ep, ep); psm_epaddr = (psm_epaddr_t) dest_addr; psm_tag = tag & (~ep_priv->domain->reserved_tag_bits); fi_context = context; PSMX_CTXT_TYPE(fi_context) = PSMX_TSEND_CONTEXT; PSMX_CTXT_USER(fi_context) = (void *)buf; PSMX_CTXT_EP(fi_context) = ep_priv; err = psm_mq_isend(ep_priv->domain->psm_mq, psm_epaddr, 0, psm_tag, buf, len, (void*)fi_context, &psm_req); if (err != PSM_OK) return psmx_errno(err); PSMX_CTXT_REQ(fi_context) = psm_req; return 0; }
ssize_t psmx_tagged_recv_no_flag_av_table(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context) { struct psmx_fid_ep *ep_priv; psm_mq_req_t psm_req; uint64_t psm_tag, psm_tagsel; struct fi_context *fi_context; int err; ep_priv = container_of(ep, struct psmx_fid_ep, ep); psm_tag = tag & (~ep_priv->domain->reserved_tag_bits); psm_tagsel = (~ignore) | ep_priv->domain->reserved_tag_bits; fi_context = context; PSMX_CTXT_TYPE(fi_context) = PSMX_TRECV_CONTEXT; PSMX_CTXT_USER(fi_context) = buf; PSMX_CTXT_EP(fi_context) = ep_priv; err = psm_mq_irecv(ep_priv->domain->psm_mq, psm_tag, psm_tagsel, 0, /* flags */ buf, len, (void *)fi_context, &psm_req); if (err != PSM_OK) return psmx_errno(err); PSMX_CTXT_REQ(fi_context) = psm_req; return 0; }
ssize_t psmx_tagged_recv_no_flag_av_table(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context) { struct psmx_fid_ep *ep_priv; psm_mq_req_t psm_req; uint64_t psm_tag, psm_tagsel; #if (PSM_VERNO_MAJOR >= 2) psm_mq_tag_t psm_tag2, psm_tagsel2; struct psmx_fid_av *av; psm_epaddr_t psm_epaddr; size_t idx; #endif struct fi_context *fi_context; int err; ep_priv = container_of(ep, struct psmx_fid_ep, ep); psm_tag = tag & (~ep_priv->domain->reserved_tag_bits); psm_tagsel = (~ignore) | ep_priv->domain->reserved_tag_bits; fi_context = context; PSMX_CTXT_TYPE(fi_context) = PSMX_TRECV_CONTEXT; PSMX_CTXT_USER(fi_context) = buf; PSMX_CTXT_EP(fi_context) = ep_priv; #if (PSM_VERNO_MAJOR >= 2) if ((ep_priv->caps & FI_DIRECTED_RECV) && src_addr != FI_ADDR_UNSPEC) { av = ep_priv->av; idx = (size_t)src_addr; if (idx >= av->last) return -FI_EINVAL; psm_epaddr = av->psm_epaddrs[idx]; } else { psm_epaddr = NULL; } PSMX_SET_TAG(psm_tag2, psm_tag, 0); PSMX_SET_TAG(psm_tagsel2, psm_tagsel, 0); err = psm_mq_irecv2(ep_priv->domain->psm_mq, psm_epaddr, &psm_tag2, &psm_tagsel2, 0, /* flags */ buf, len, (void *)fi_context, &psm_req); #else err = psm_mq_irecv(ep_priv->domain->psm_mq, psm_tag, psm_tagsel, 0, /* flags */ buf, len, (void *)fi_context, &psm_req); #endif if (err != PSM_OK) return psmx_errno(err); PSMX_CTXT_REQ(fi_context) = psm_req; return 0; }
static ssize_t psmx_ep_cancel(fid_t fid, void *context) { struct psmx_fid_ep *ep; psm_mq_status_t status; struct fi_context *fi_context = context; uint64_t flags; struct psmx_cq_event *event; int err; ep = container_of(fid, struct psmx_fid_ep, ep.fid); if (!ep->domain) return -FI_EBADF; if (!fi_context) return -FI_EINVAL; switch (PSMX_CTXT_TYPE(fi_context)) { case PSMX_TRECV_CONTEXT: flags = FI_RECV | FI_TAGGED; break; case PSMX_RECV_CONTEXT: case PSMX_MULTI_RECV_CONTEXT: flags = FI_RECV | FI_MSG; break; default: return -FI_EOPNOTSUPP; } err = psm_mq_cancel((psm_mq_req_t *)&PSMX_CTXT_REQ(fi_context)); if (err == PSM_OK) { err = psm_mq_test((psm_mq_req_t *)&PSMX_CTXT_REQ(fi_context), &status); if (err == PSM_OK && ep->recv_cq) { event = psmx_cq_create_event( ep->recv_cq, status.context, NULL, /* buf */ flags, 0, /* len */ 0, /* data */ 0, /* tag */ 0 /* olen */, -FI_ECANCELED); if (event) psmx_cq_enqueue_event(ep->recv_cq, event); else return -FI_ENOMEM; } } return psmx_errno(err); }
ssize_t psmx_tagged_send_no_flag_av_table(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, uint64_t tag, void *context) { struct psmx_fid_ep *ep_priv; struct psmx_fid_av *av; psm_epaddr_t psm_epaddr; psm_mq_req_t psm_req; uint64_t psm_tag; #if (PSM_VERNO_MAJOR >= 2) psm_mq_tag_t psm_tag2; #endif struct fi_context *fi_context; int err; size_t idx; ep_priv = container_of(ep, struct psmx_fid_ep, ep); av = ep_priv->av; idx = (size_t)dest_addr; if (idx >= av->last) return -FI_EINVAL; psm_epaddr = av->psm_epaddrs[idx]; psm_tag = tag & (~ep_priv->domain->reserved_tag_bits); #if (PSM_VERNO_MAJOR >= 2) PSMX_SET_TAG(psm_tag2, psm_tag, 0); #endif fi_context = context; PSMX_CTXT_TYPE(fi_context) = PSMX_TSEND_CONTEXT; PSMX_CTXT_USER(fi_context) = (void *)buf; PSMX_CTXT_EP(fi_context) = ep_priv; #if (PSM_VERNO_MAJOR >= 2) err = psm_mq_isend2(ep_priv->domain->psm_mq, psm_epaddr, 0, &psm_tag2, buf, len, (void*)fi_context, &psm_req); #else err = psm_mq_isend(ep_priv->domain->psm_mq, psm_epaddr, 0, psm_tag, buf, len, (void*)fi_context, &psm_req); #endif if (err != PSM_OK) return psmx_errno(err); PSMX_CTXT_REQ(fi_context) = psm_req; return 0; }
int psmx_am_rma_handler(psm_am_token_t token, psm_epaddr_t epaddr, psm_amarg_t *args, int nargs, void *src, uint32_t len) { psm_amarg_t rep_args[8]; void *rma_addr; ssize_t rma_len; uint64_t key; int err = 0; int op_error = 0; int cmd, eom, has_data; struct psmx_am_request *req; struct psmx_cq_event *event; int chunk_size; uint64_t offset; struct psmx_fid_mr *mr; cmd = args[0].u32w0 & PSMX_AM_OP_MASK; eom = args[0].u32w0 & PSMX_AM_EOM; has_data = args[0].u32w0 & PSMX_AM_DATA; switch (cmd) { case PSMX_AM_REQ_WRITE: rma_len = args[0].u32w1; rma_addr = (void *)(uintptr_t)args[2].u64; key = args[3].u64; mr = psmx_mr_hash_get(key); op_error = mr ? psmx_mr_validate(mr, (uint64_t)rma_addr, len, FI_REMOTE_WRITE) : -EINVAL; if (!op_error) { rma_addr += mr->offset; memcpy(rma_addr, src, len); if (eom) { if (mr->cq) { /* TODO: report the addr/len of the whole write */ event = psmx_cq_create_event( mr->cq, 0, /* context */ rma_addr, 0, /* flags */ rma_len, has_data ? args[4].u64 : 0, 0, /* tag */ 0, /* olen */ 0); if (event) psmx_cq_enqueue_event(mr->cq, event); else err = -ENOMEM; } if (mr->cntr) psmx_cntr_inc(mr->cntr); if (mr->domain->rma_ep->remote_write_cntr) psmx_cntr_inc(mr->domain->rma_ep->remote_write_cntr); } } if (eom || op_error) { rep_args[0].u32w0 = PSMX_AM_REP_WRITE | eom; rep_args[0].u32w1 = op_error; rep_args[1].u64 = args[1].u64; err = psm_am_reply_short(token, PSMX_AM_RMA_HANDLER, rep_args, 2, NULL, 0, 0, NULL, NULL ); } break; case PSMX_AM_REQ_WRITE_LONG: rma_len = args[0].u32w1; rma_addr = (void *)(uintptr_t)args[2].u64; key = args[3].u64; mr = psmx_mr_hash_get(key); op_error = mr ? psmx_mr_validate(mr, (uint64_t)rma_addr, len, FI_REMOTE_WRITE) : -EINVAL; if (op_error) { rep_args[0].u32w0 = PSMX_AM_REP_WRITE | eom; rep_args[0].u32w1 = op_error; rep_args[1].u64 = args[1].u64; err = psm_am_reply_short(token, PSMX_AM_RMA_HANDLER, rep_args, 2, NULL, 0, 0, NULL, NULL ); break; } rma_addr += mr->offset; req = calloc(1, sizeof(*req)); if (!req) { err = -ENOMEM; } else { req->op = args[0].u32w0; req->write.addr = (uint64_t)rma_addr; req->write.len = rma_len; req->write.key = key; req->write.context = (void *)args[4].u64; req->write.data = has_data ? args[5].u64 : 0; PSMX_CTXT_TYPE(&req->fi_context) = PSMX_REMOTE_WRITE_CONTEXT; PSMX_CTXT_USER(&req->fi_context) = mr; psmx_am_enqueue_rma(mr->domain, req); } break; case PSMX_AM_REQ_READ: rma_len = args[0].u32w1; rma_addr = (void *)(uintptr_t)args[2].u64; key = args[3].u64; offset = args[4].u64; mr = psmx_mr_hash_get(key); op_error = mr ? psmx_mr_validate(mr, (uint64_t)rma_addr, rma_len, FI_REMOTE_READ) : -EINVAL; if (!op_error) { rma_addr += mr->offset; } else { rma_addr = NULL; rma_len = 0; } chunk_size = MIN(PSMX_AM_CHUNK_SIZE, psmx_am_param.max_reply_short); assert(rma_len <= chunk_size); rep_args[0].u32w0 = PSMX_AM_REP_READ | eom; rep_args[0].u32w1 = op_error; rep_args[1].u64 = args[1].u64; rep_args[2].u64 = offset; err = psm_am_reply_short(token, PSMX_AM_RMA_HANDLER, rep_args, 3, rma_addr, rma_len, 0, NULL, NULL ); if (eom && !op_error) { if (mr->domain->rma_ep->remote_read_cntr) psmx_cntr_inc(mr->domain->rma_ep->remote_read_cntr); } break; case PSMX_AM_REQ_READ_LONG: rma_len = args[0].u32w1; rma_addr = (void *)(uintptr_t)args[2].u64; key = args[3].u64; mr = psmx_mr_hash_get(key); op_error = mr ? psmx_mr_validate(mr, (uint64_t)rma_addr, len, FI_REMOTE_WRITE) : -EINVAL; if (op_error) { rep_args[0].u32w0 = PSMX_AM_REP_READ | eom; rep_args[0].u32w1 = op_error; rep_args[1].u64 = args[1].u64; rep_args[2].u64 = 0; err = psm_am_reply_short(token, PSMX_AM_RMA_HANDLER, rep_args, 3, NULL, 0, 0, NULL, NULL ); break; } rma_addr += mr->offset; req = calloc(1, sizeof(*req)); if (!req) { err = -ENOMEM; } else { req->op = args[0].u32w0; req->read.addr = (uint64_t)rma_addr; req->read.len = rma_len; req->read.key = key; req->read.context = (void *)args[4].u64; req->read.peer_addr = (void *)epaddr; PSMX_CTXT_TYPE(&req->fi_context) = PSMX_REMOTE_READ_CONTEXT; PSMX_CTXT_USER(&req->fi_context) = mr; psmx_am_enqueue_rma(mr->domain, req); } break; case PSMX_AM_REP_WRITE: req = (struct psmx_am_request *)(uintptr_t)args[1].u64; assert(req->op == PSMX_AM_REQ_WRITE); op_error = (int)args[0].u32w1; if (!req->error) req->error = op_error; if (eom) { if (req->ep->send_cq && !req->no_event) { event = psmx_cq_create_event( req->ep->send_cq, req->write.context, req->write.buf, 0, /* flags */ req->write.len, 0, /* data */ 0, /* tag */ 0, /* olen */ req->error); if (event) psmx_cq_enqueue_event(req->ep->send_cq, event); else err = -ENOMEM; } if (req->ep->write_cntr) psmx_cntr_inc(req->ep->write_cntr); free(req); } break; case PSMX_AM_REP_READ: req = (struct psmx_am_request *)(uintptr_t)args[1].u64; assert(req->op == PSMX_AM_REQ_READ); op_error = (int)args[0].u32w1; offset = args[2].u64; if (!req->error) req->error = op_error; if (!op_error) { memcpy(req->read.buf + offset, src, len); req->read.len_read += len; } if (eom) { if (req->ep->send_cq && !req->no_event) { event = psmx_cq_create_event( req->ep->send_cq, req->read.context, req->read.buf, 0, /* flags */ req->read.len_read, 0, /* data */ 0, /* tag */ req->read.len - req->read.len_read, req->error); if (event) psmx_cq_enqueue_event(req->ep->send_cq, event); else err = -ENOMEM; } if (req->ep->read_cntr) psmx_cntr_inc(req->ep->read_cntr); free(req); } break; default: err = -EINVAL; } return err; }
ssize_t _psmx_writeto(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context, uint64_t flags, uint64_t data) { struct psmx_fid_ep *ep_priv; struct psmx_fid_av *av; struct psmx_epaddr_context *epaddr_context; struct psmx_am_request *req; psm_amarg_t args[8]; int nargs; int am_flags = PSM_AM_FLAG_ASYNC; int err; int chunk_size; psm_mq_req_t psm_req; uint64_t psm_tag; size_t idx; if (flags & FI_TRIGGER) { struct psmx_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -ENOMEM; trigger->op = PSMX_TRIGGERED_WRITE; trigger->cntr = container_of(ctxt->threshold.cntr, struct psmx_fid_cntr, cntr); trigger->threshold = ctxt->threshold.threshold; trigger->write.ep = ep; trigger->write.buf = buf; trigger->write.len = len; trigger->write.desc = desc; trigger->write.dest_addr = dest_addr; trigger->write.addr = addr; trigger->write.key = key; trigger->write.context = context; trigger->write.flags = flags & ~FI_TRIGGER; trigger->write.data = data; psmx_cntr_add_trigger(trigger->cntr, trigger); return 0; } ep_priv = container_of(ep, struct psmx_fid_ep, ep); assert(ep_priv->domain); if (!buf) return -EINVAL; av = ep_priv->av; if (av && av->type == FI_AV_TABLE) { idx = dest_addr; if (idx >= av->last) return -EINVAL; dest_addr = (fi_addr_t) av->psm_epaddrs[idx]; } else if (!dest_addr) { return -EINVAL; } epaddr_context = psm_epaddr_getctxt((void *)dest_addr); if (epaddr_context->epid == ep_priv->domain->psm_epid) return psmx_rma_self(PSMX_AM_REQ_WRITE, ep_priv, (void *)buf, len, desc, addr, key, context, flags, data); if (flags & FI_INJECT) { req = malloc(sizeof(*req) + len); if (!req) return -ENOMEM; memset((void *)req, 0, sizeof(*req)); memcpy((void *)req + sizeof(*req), (void *)buf, len); buf = (void *)req + sizeof(*req); PSMX_CTXT_TYPE(&req->fi_context) = PSMX_INJECT_WRITE_CONTEXT; req->no_event = 1; } else { req = calloc(1, sizeof(*req)); if (!req) return -ENOMEM; if (ep_priv->send_cq_event_flag && !(flags & FI_EVENT)) { PSMX_CTXT_TYPE(&req->fi_context) = PSMX_NOCOMP_WRITE_CONTEXT; req->no_event = 1; } else { PSMX_CTXT_TYPE(&req->fi_context) = PSMX_WRITE_CONTEXT; } } req->op = PSMX_AM_REQ_WRITE; req->write.buf = (void *)buf; req->write.len = len; req->write.addr = addr; /* needed? */ req->write.key = key; /* needed? */ req->write.context = context; req->ep = ep_priv; PSMX_CTXT_USER(&req->fi_context) = context; chunk_size = MIN(PSMX_AM_CHUNK_SIZE, psmx_am_param.max_request_short); if (psmx_env.tagged_rma && len > chunk_size) { psm_tag = PSMX_RMA_BIT | ep_priv->domain->psm_epid; args[0].u32w0 = PSMX_AM_REQ_WRITE_LONG; args[0].u32w1 = len; args[1].u64 = (uint64_t)req; args[2].u64 = addr; args[3].u64 = key; args[4].u64 = psm_tag; nargs = 5; if (flags & FI_REMOTE_CQ_DATA) { args[5].u64 = data; args[0].u32w0 |= PSMX_AM_DATA; nargs++; } err = psm_am_request_short((psm_epaddr_t) dest_addr, PSMX_AM_RMA_HANDLER, args, nargs, NULL, 0, am_flags | PSM_AM_FLAG_NOREPLY, NULL, NULL); psm_mq_isend(ep_priv->domain->psm_mq, (psm_epaddr_t) dest_addr, 0, psm_tag, buf, len, (void *)&req->fi_context, &psm_req); return 0; } nargs = 4; while (len > chunk_size) { args[0].u32w0 = PSMX_AM_REQ_WRITE; args[0].u32w1 = chunk_size; args[1].u64 = (uint64_t)(uintptr_t)req; args[2].u64 = addr; args[3].u64 = key; err = psm_am_request_short((psm_epaddr_t) dest_addr, PSMX_AM_RMA_HANDLER, args, nargs, (void *)buf, chunk_size, am_flags | PSM_AM_FLAG_NOREPLY, NULL, NULL); buf += chunk_size; addr += chunk_size; len -= chunk_size; } args[0].u32w0 = PSMX_AM_REQ_WRITE | PSMX_AM_EOM; args[0].u32w1 = len; args[1].u64 = (uint64_t)(uintptr_t)req; args[2].u64 = addr; args[3].u64 = key; if (flags & FI_REMOTE_CQ_DATA) { args[4].u64 = data; args[0].u32w0 |= PSMX_AM_DATA; nargs++; } err = psm_am_request_short((psm_epaddr_t) dest_addr, PSMX_AM_RMA_HANDLER, args, nargs, (void *)buf, len, am_flags, NULL, NULL); return 0; }
ssize_t _psmx_readfrom(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context, uint64_t flags) { struct psmx_fid_ep *ep_priv; struct psmx_fid_av *av; struct psmx_epaddr_context *epaddr_context; struct psmx_am_request *req; psm_amarg_t args[8]; int err; int chunk_size; size_t offset = 0; uint64_t psm_tag; psm_mq_req_t psm_req; size_t idx; if (flags & FI_TRIGGER) { struct psmx_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -ENOMEM; trigger->op = PSMX_TRIGGERED_READ; trigger->cntr = container_of(ctxt->threshold.cntr, struct psmx_fid_cntr, cntr); trigger->threshold = ctxt->threshold.threshold; trigger->read.ep = ep; trigger->read.buf = buf; trigger->read.len = len; trigger->read.desc = desc; trigger->read.src_addr = src_addr; trigger->read.addr = addr; trigger->read.key = key; trigger->read.context = context; trigger->read.flags = flags & ~FI_TRIGGER; psmx_cntr_add_trigger(trigger->cntr, trigger); return 0; } ep_priv = container_of(ep, struct psmx_fid_ep, ep); assert(ep_priv->domain); if (!buf) return -EINVAL; av = ep_priv->av; if (av && av->type == FI_AV_TABLE) { idx = src_addr; if (idx >= av->last) return -EINVAL; src_addr = (fi_addr_t) av->psm_epaddrs[idx]; } else if (!src_addr) { return -EINVAL; } epaddr_context = psm_epaddr_getctxt((void *)src_addr); if (epaddr_context->epid == ep_priv->domain->psm_epid) return psmx_rma_self(PSMX_AM_REQ_READ, ep_priv, buf, len, desc, addr, key, context, flags, 0); req = calloc(1, sizeof(*req)); if (!req) return -ENOMEM; req->op = PSMX_AM_REQ_READ; req->read.buf = buf; req->read.len = len; req->read.addr = addr; /* needed? */ req->read.key = key; /* needed? */ req->read.context = context; req->ep = ep_priv; PSMX_CTXT_TYPE(&req->fi_context) = PSMX_READ_CONTEXT; PSMX_CTXT_USER(&req->fi_context) = context; if (ep_priv->send_cq_event_flag && !(flags & FI_EVENT)) { PSMX_CTXT_TYPE(&req->fi_context) = PSMX_NOCOMP_READ_CONTEXT; req->no_event = 1; } chunk_size = MIN(PSMX_AM_CHUNK_SIZE, psmx_am_param.max_reply_short); if (psmx_env.tagged_rma && len > chunk_size) { psm_tag = PSMX_RMA_BIT | ep_priv->domain->psm_epid; err = psm_mq_irecv(ep_priv->domain->psm_mq, psm_tag, -1ULL, 0, buf, len, (void *)&req->fi_context, &psm_req); args[0].u32w0 = PSMX_AM_REQ_READ_LONG; args[0].u32w1 = len; args[1].u64 = (uint64_t)req; args[2].u64 = addr; args[3].u64 = key; args[4].u64 = psm_tag; err = psm_am_request_short((psm_epaddr_t) src_addr, PSMX_AM_RMA_HANDLER, args, 5, NULL, 0, PSM_AM_FLAG_NOREPLY, NULL, NULL); return 0; } args[0].u32w0 = PSMX_AM_REQ_READ; args[1].u64 = (uint64_t)(uintptr_t)req; args[3].u64 = key; while (len > chunk_size) { args[0].u32w1 = chunk_size; args[2].u64 = addr; args[4].u64 = offset; err = psm_am_request_short((psm_epaddr_t) src_addr, PSMX_AM_RMA_HANDLER, args, 5, NULL, 0, 0, NULL, NULL); addr += chunk_size; len -= chunk_size; offset += chunk_size; } args[0].u32w0 = PSMX_AM_REQ_READ | PSMX_AM_EOM; args[0].u32w1 = len; args[2].u64 = addr; args[4].u64 = offset; err = psm_am_request_short((psm_epaddr_t) src_addr, PSMX_AM_RMA_HANDLER, args, 5, NULL, 0, 0, NULL, NULL); return 0; }
ssize_t _psmx_tagged_send(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, uint64_t tag, void *context, uint64_t flags) #endif { struct psmx_fid_ep *ep_priv; struct psmx_fid_av *av; psm_epaddr_t psm_epaddr; psm_mq_req_t psm_req; uint64_t psm_tag; #if (PSM_VERNO_MAJOR >= 2) psm_mq_tag_t psm_tag2; #endif struct fi_context *fi_context; int err; size_t idx; int no_completion = 0; struct psmx_cq_event *event; ep_priv = container_of(ep, struct psmx_fid_ep, ep); if (flags & FI_TRIGGER) { struct psmx_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -FI_ENOMEM; trigger->op = PSMX_TRIGGERED_TSEND; trigger->cntr = container_of(ctxt->trigger.threshold.cntr, struct psmx_fid_cntr, cntr); trigger->threshold = ctxt->trigger.threshold.threshold; trigger->tsend.ep = ep; trigger->tsend.buf = buf; trigger->tsend.len = len; trigger->tsend.desc = desc; trigger->tsend.dest_addr = dest_addr; trigger->tsend.tag = tag; trigger->tsend.context = context; trigger->tsend.flags = flags & ~FI_TRIGGER; #if (PSM_VERNO_MAJOR >= 2) trigger->tsend.data = data; #endif psmx_cntr_add_trigger(trigger->cntr, trigger); return 0; } if (tag & ep_priv->domain->reserved_tag_bits) { FI_WARN(&psmx_prov, FI_LOG_EP_DATA, "using reserved tag bits." "tag=%lx. reserved_bits=%lx.\n", tag, ep_priv->domain->reserved_tag_bits); } av = ep_priv->av; if (av && av->type == FI_AV_TABLE) { idx = (size_t)dest_addr; if (idx >= av->last) return -FI_EINVAL; psm_epaddr = av->psm_epaddrs[idx]; } else { psm_epaddr = (psm_epaddr_t) dest_addr; } psm_tag = tag & (~ep_priv->domain->reserved_tag_bits); #if (PSM_VERNO_MAJOR >= 2) PSMX_SET_TAG(psm_tag2, psm_tag, data); #endif if ((flags & PSMX_NO_COMPLETION) || (ep_priv->send_selective_completion && !(flags & FI_COMPLETION))) no_completion = 1; if (flags & FI_INJECT) { if (len > PSMX_INJECT_SIZE) return -FI_EMSGSIZE; #if (PSM_VERNO_MAJOR >= 2) err = psm_mq_send2(ep_priv->domain->psm_mq, psm_epaddr, 0, &psm_tag2, buf, len); #else err = psm_mq_send(ep_priv->domain->psm_mq, psm_epaddr, 0, psm_tag, buf, len); #endif if (err != PSM_OK) return psmx_errno(err); if (ep_priv->send_cntr) psmx_cntr_inc(ep_priv->send_cntr); if (ep_priv->send_cq && !no_completion) { event = psmx_cq_create_event( ep_priv->send_cq, context, (void *)buf, flags, len, #if (PSM_VERNO_MAJOR >= 2) (uint64_t) data, psm_tag, #else 0 /* data */, psm_tag, #endif 0 /* olen */, 0 /* err */); if (event) psmx_cq_enqueue_event(ep_priv->send_cq, event); else return -FI_ENOMEM; } return 0; } if (no_completion && !context) { fi_context = &ep_priv->nocomp_send_context; } else { if (!context) return -FI_EINVAL; fi_context = context; PSMX_CTXT_TYPE(fi_context) = PSMX_TSEND_CONTEXT; PSMX_CTXT_USER(fi_context) = (void *)buf; PSMX_CTXT_EP(fi_context) = ep_priv; } #if (PSM_VERNO_MAJOR >= 2) err = psm_mq_isend2(ep_priv->domain->psm_mq, psm_epaddr, 0, &psm_tag2, buf, len, (void*)fi_context, &psm_req); #else err = psm_mq_isend(ep_priv->domain->psm_mq, psm_epaddr, 0, psm_tag, buf, len, (void*)fi_context, &psm_req); #endif if (err != PSM_OK) return psmx_errno(err); if (fi_context == context) PSMX_CTXT_REQ(fi_context) = psm_req; return 0; }
ssize_t _psmx_write(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context, uint64_t flags, uint64_t data) { struct psmx_fid_ep *ep_priv; struct psmx_fid_av *av; struct psmx_epaddr_context *epaddr_context; struct psmx_am_request *req; psm_amarg_t args[8]; int nargs; int am_flags = PSM_AM_FLAG_ASYNC; int chunk_size; psm_mq_req_t psm_req; uint64_t psm_tag; size_t idx; void *psm_context; int no_event; ep_priv = container_of(ep, struct psmx_fid_ep, ep); if (flags & FI_TRIGGER) { struct psmx_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -FI_ENOMEM; trigger->op = PSMX_TRIGGERED_WRITE; trigger->cntr = container_of(ctxt->trigger.threshold.cntr, struct psmx_fid_cntr, cntr); trigger->threshold = ctxt->trigger.threshold.threshold; trigger->write.ep = ep; trigger->write.buf = buf; trigger->write.len = len; trigger->write.desc = desc; trigger->write.dest_addr = dest_addr; trigger->write.addr = addr; trigger->write.key = key; trigger->write.context = context; trigger->write.flags = flags & ~FI_TRIGGER; trigger->write.data = data; psmx_cntr_add_trigger(trigger->cntr, trigger); return 0; } if (!buf) return -FI_EINVAL; av = ep_priv->av; if (av && av->type == FI_AV_TABLE) { idx = dest_addr; if (idx >= av->last) return -FI_EINVAL; dest_addr = (fi_addr_t) av->psm_epaddrs[idx]; } else if (!dest_addr) { return -FI_EINVAL; } epaddr_context = psm_epaddr_getctxt((void *)dest_addr); if (epaddr_context->epid == ep_priv->domain->psm_epid) return psmx_rma_self(PSMX_AM_REQ_WRITE, ep_priv, (void *)buf, len, desc, addr, key, context, flags, data); no_event = (flags & PSMX_NO_COMPLETION) || (ep_priv->send_selective_completion && !(flags & FI_COMPLETION)); if (flags & FI_INJECT) { if (len > PSMX_INJECT_SIZE) return -FI_EMSGSIZE; req = malloc(sizeof(*req) + len); if (!req) return -FI_ENOMEM; memset((void *)req, 0, sizeof(*req)); memcpy((void *)req + sizeof(*req), (void *)buf, len); buf = (void *)req + sizeof(*req); } else { req = calloc(1, sizeof(*req)); if (!req) return -FI_ENOMEM; PSMX_CTXT_TYPE(&req->fi_context) = no_event ? PSMX_NOCOMP_WRITE_CONTEXT : PSMX_WRITE_CONTEXT; } req->no_event = no_event; req->op = PSMX_AM_REQ_WRITE; req->write.buf = (void *)buf; req->write.len = len; req->write.addr = addr; /* needed? */ req->write.key = key; /* needed? */ req->write.context = context; req->ep = ep_priv; req->cq_flags = FI_WRITE | FI_RMA; PSMX_CTXT_USER(&req->fi_context) = context; PSMX_CTXT_EP(&req->fi_context) = ep_priv; chunk_size = MIN(PSMX_AM_CHUNK_SIZE, psmx_am_param.max_request_short); if (psmx_env.tagged_rma && len > chunk_size) { void *payload = NULL; int payload_len = 0; psm_tag = PSMX_RMA_BIT | ep_priv->domain->psm_epid; args[0].u32w0 = PSMX_AM_REQ_WRITE_LONG; args[0].u32w1 = len; args[1].u64 = (uint64_t)req; args[2].u64 = addr; args[3].u64 = key; args[4].u64 = psm_tag; nargs = 5; if (flags & FI_REMOTE_CQ_DATA) { args[0].u32w0 |= PSMX_AM_DATA; payload = &data; payload_len = sizeof(data); am_flags = 0; } if (flags & FI_DELIVERY_COMPLETE) { args[0].u32w0 |= PSMX_AM_FORCE_ACK; psm_context = NULL; } else { psm_context = (void *)&req->fi_context; } /* NOTE: if nargs is greater than 5, the following psm_mq_isend * would hang if the destination is on the same node (i.e. going * through the shared memory path). As the result, the immediate * data is sent as payload instead of args[5]. */ psm_am_request_short((psm_epaddr_t) dest_addr, PSMX_AM_RMA_HANDLER, args, nargs, payload, payload_len, am_flags, NULL, NULL); psm_mq_isend(ep_priv->domain->psm_mq, (psm_epaddr_t) dest_addr, 0, psm_tag, buf, len, psm_context, &psm_req); return 0; } nargs = 4; while (len > chunk_size) { args[0].u32w0 = PSMX_AM_REQ_WRITE; args[0].u32w1 = chunk_size; args[1].u64 = (uint64_t)(uintptr_t)req; args[2].u64 = addr; args[3].u64 = key; psm_am_request_short((psm_epaddr_t) dest_addr, PSMX_AM_RMA_HANDLER, args, nargs, (void *)buf, chunk_size, am_flags, NULL, NULL); buf += chunk_size; addr += chunk_size; len -= chunk_size; } args[0].u32w0 = PSMX_AM_REQ_WRITE | PSMX_AM_EOM; args[0].u32w1 = len; args[1].u64 = (uint64_t)(uintptr_t)req; args[2].u64 = addr; args[3].u64 = key; if (flags & FI_REMOTE_CQ_DATA) { args[4].u64 = data; args[0].u32w0 |= PSMX_AM_DATA; nargs++; } psm_am_request_short((psm_epaddr_t) dest_addr, PSMX_AM_RMA_HANDLER, args, nargs, (void *)buf, len, am_flags, NULL, NULL); return 0; }
ssize_t _psmx_tagged_recv(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context, uint64_t flags) { struct psmx_fid_ep *ep_priv; psm_mq_req_t psm_req; uint64_t psm_tag, psm_tagsel; struct fi_context *fi_context; int err; ep_priv = container_of(ep, struct psmx_fid_ep, ep); if (flags & FI_PEEK) return _psmx_tagged_peek(ep, buf, len, desc, src_addr, tag, ignore, context, flags); if (flags & FI_TRIGGER) { struct psmx_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -FI_ENOMEM; trigger->op = PSMX_TRIGGERED_TRECV; trigger->cntr = container_of(ctxt->trigger.threshold.cntr, struct psmx_fid_cntr, cntr); trigger->threshold = ctxt->trigger.threshold.threshold; trigger->trecv.ep = ep; trigger->trecv.buf = buf; trigger->trecv.len = len; trigger->trecv.desc = desc; trigger->trecv.src_addr = src_addr; trigger->trecv.tag = tag; trigger->trecv.ignore = ignore; trigger->trecv.context = context; trigger->trecv.flags = flags & ~FI_TRIGGER; psmx_cntr_add_trigger(trigger->cntr, trigger); return 0; } if (tag & ep_priv->domain->reserved_tag_bits) { FI_WARN(&psmx_prov, FI_LOG_EP_DATA, "using reserved tag bits." "tag=%lx. reserved_bits=%lx.\n", tag, ep_priv->domain->reserved_tag_bits); } psm_tag = tag & (~ep_priv->domain->reserved_tag_bits); psm_tagsel = (~ignore) | ep_priv->domain->reserved_tag_bits; if (ep_priv->recv_selective_completion && !(flags & FI_COMPLETION) && !context) { fi_context = &ep_priv->nocomp_recv_context; } else { if (!context) return -FI_EINVAL; fi_context = context; PSMX_CTXT_TYPE(fi_context) = PSMX_TRECV_CONTEXT; PSMX_CTXT_USER(fi_context) = buf; PSMX_CTXT_EP(fi_context) = ep_priv; } err = psm_mq_irecv(ep_priv->domain->psm_mq, psm_tag, psm_tagsel, 0, /* flags */ buf, len, (void *)fi_context, &psm_req); if (err != PSM_OK) return psmx_errno(err); if (fi_context == context) PSMX_CTXT_REQ(fi_context) = psm_req; return 0; }
int psmx_ep_open(struct fid_domain *domain, struct fi_info *info, struct fid_ep **ep, void *context) { struct psmx_fid_domain *domain_priv; struct psmx_fid_ep *ep_priv; int err; uint64_t ep_cap; if (info) ep_cap = info->caps; else ep_cap = FI_TAGGED; domain_priv = container_of(domain, struct psmx_fid_domain, util_domain.domain_fid.fid); if (!domain_priv) return -FI_EINVAL; if (info && info->ep_attr && info->ep_attr->auth_key) { if (info->ep_attr->auth_keylen != sizeof(psm_uuid_t)) { FI_WARN(&psmx_prov, FI_LOG_EP_CTRL, "Invalid auth_key_len %d, should be %d.\n", info->ep_attr->auth_keylen, sizeof(psm_uuid_t)); return -FI_EINVAL; } if (memcmp(domain_priv->fabric->uuid, info->ep_attr->auth_key, sizeof(psm_uuid_t))) { FI_WARN(&psmx_prov, FI_LOG_EP_CTRL, "Invalid auth_key: %s\n", psmx_uuid_to_string((void *)info->ep_attr->auth_key)); return -FI_EINVAL; } } err = psmx_domain_check_features(domain_priv, ep_cap); if (err) return err; ep_priv = (struct psmx_fid_ep *) calloc(1, sizeof *ep_priv); if (!ep_priv) return -FI_ENOMEM; ep_priv->ep.fid.fclass = FI_CLASS_EP; ep_priv->ep.fid.context = context; ep_priv->ep.fid.ops = &psmx_fi_ops; ep_priv->ep.ops = &psmx_ep_ops; ep_priv->ep.cm = &psmx_cm_ops; ep_priv->domain = domain_priv; atomic_initialize(&ep_priv->ref, 0); PSMX_CTXT_TYPE(&ep_priv->nocomp_send_context) = PSMX_NOCOMP_SEND_CONTEXT; PSMX_CTXT_EP(&ep_priv->nocomp_send_context) = ep_priv; PSMX_CTXT_TYPE(&ep_priv->nocomp_recv_context) = PSMX_NOCOMP_RECV_CONTEXT; PSMX_CTXT_EP(&ep_priv->nocomp_recv_context) = ep_priv; if (ep_cap & FI_TAGGED) ep_priv->ep.tagged = &psmx_tagged_ops; if (ep_cap & FI_MSG) ep_priv->ep.msg = &psmx_msg_ops; if ((ep_cap & FI_MSG) && psmx_env.am_msg) ep_priv->ep.msg = &psmx_msg2_ops; if (ep_cap & FI_RMA) ep_priv->ep.rma = &psmx_rma_ops; if (ep_cap & FI_ATOMICS) ep_priv->ep.atomic = &psmx_atomic_ops; ep_priv->caps = ep_cap; err = psmx_domain_enable_ep(domain_priv, ep_priv); if (err) { free(ep_priv); return err; } psmx_domain_acquire(domain_priv); if (info) { if (info->tx_attr) ep_priv->tx_flags = info->tx_attr->op_flags; if (info->rx_attr) ep_priv->rx_flags = info->rx_attr->op_flags; } psmx_ep_optimize_ops(ep_priv); ep_priv->service = PSMX_ANY_SERVICE; if (info && info->src_addr) ep_priv->service = ((struct psmx_src_name *)info->src_addr)->service; if (ep_priv->service == PSMX_ANY_SERVICE) ep_priv->service = ((getpid() & 0x7FFF) << 16) + ((uintptr_t)ep_priv & 0xFFFF); psmx_ns_add_local_name(ep_priv->service, domain_priv->psm_epid); *ep = &ep_priv->ep; return 0; }
static struct psmx_event *psmx_eq_create_event_from_status( struct psmx_fid_eq *eq, psm_mq_status_t *psm_status) { struct psmx_event *event; struct psmx_multi_recv *req; struct fi_context *fi_context = psm_status->context; void *op_context, *buf; int is_recv = 0; event = calloc(1, sizeof(*event)); if (!event) { fprintf(stderr, "%s: out of memory\n", __func__); return NULL; } switch(PSMX_CTXT_TYPE(fi_context)) { case PSMX_SEND_CONTEXT: op_context = fi_context; buf = PSMX_CTXT_USER(fi_context); break; case PSMX_RECV_CONTEXT: op_context = fi_context; buf = PSMX_CTXT_USER(fi_context); is_recv = 1; break; case PSMX_MULTI_RECV_CONTEXT: op_context = fi_context; req = PSMX_CTXT_USER(fi_context); buf = req->buf + req->offset; is_recv = 1; break; default: op_context = PSMX_CTXT_USER(fi_context); buf = NULL; break; } if ((event->error = !!psm_status->error_code)) { event->eqe.err.op_context = op_context; event->eqe.err.err = -psmx_errno(psm_status->error_code); event->eqe.err.prov_errno = psm_status->error_code; event->eqe.err.olen = psm_status->msg_length - psm_status->nbytes; //event->eqe.err.prov_data = NULL; /* FIXME */ goto out; } switch (eq->format) { case FI_EQ_FORMAT_CONTEXT: event->eqe.context.op_context = op_context; break; case FI_EQ_FORMAT_COMP: event->eqe.comp.op_context = op_context; //event->eqe.comp.flags = 0; /* FIXME */ event->eqe.comp.len = psm_status->nbytes; break; case FI_EQ_FORMAT_DATA: event->eqe.data.op_context = op_context; event->eqe.data.buf = buf; //event->eqe.data.flags = 0; /* FIXME */ event->eqe.data.len = psm_status->nbytes; //event->eqe.data.data = 0; /* FIXME */ break; case FI_EQ_FORMAT_TAGGED: event->eqe.tagged.op_context = op_context; event->eqe.tagged.buf = buf; //event->eqe.tagged.flags = 0; /* FIXME */ event->eqe.tagged.len = psm_status->nbytes; //event->eqe.tagged.data = 0; /* FIXME */ event->eqe.tagged.tag = psm_status->msg_tag; break; case FI_EQ_FORMAT_CM: default: fprintf(stderr, "%s: unsupported EQ format %d\n", __func__, eq->format); return NULL; } out: if (is_recv) event->source = psm_status->msg_tag; return event; }
ssize_t _psmx_tagged_recv(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context, uint64_t flags) { struct psmx_fid_ep *ep_priv; psm_mq_req_t psm_req; uint64_t psm_tag, psm_tagsel; #if (PSM_VERNO_MAJOR >= 2) psm_mq_tag_t psm_tag2, psm_tagsel2; struct psmx_fid_av *av; size_t idx; #endif struct fi_context *fi_context; int err; ep_priv = container_of(ep, struct psmx_fid_ep, ep); if (flags & FI_PEEK) return _psmx_tagged_peek(ep, buf, len, desc, src_addr, tag, ignore, context, flags); if (flags & FI_TRIGGER) { struct psmx_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -FI_ENOMEM; trigger->op = PSMX_TRIGGERED_TRECV; trigger->cntr = container_of(ctxt->trigger.threshold.cntr, struct psmx_fid_cntr, cntr); trigger->threshold = ctxt->trigger.threshold.threshold; trigger->trecv.ep = ep; trigger->trecv.buf = buf; trigger->trecv.len = len; trigger->trecv.desc = desc; trigger->trecv.src_addr = src_addr; trigger->trecv.tag = tag; trigger->trecv.ignore = ignore; trigger->trecv.context = context; trigger->trecv.flags = flags & ~FI_TRIGGER; psmx_cntr_add_trigger(trigger->cntr, trigger); return 0; } #if (PSM_VERNO_MAJOR >= 2) if (flags & FI_CLAIM) { if (!context) return -FI_EINVAL; /* TODO: handle FI_DISCARD */ fi_context = context; psm_req = PSMX_CTXT_REQ(fi_context); PSMX_CTXT_TYPE(fi_context) = PSMX_TRECV_CONTEXT; PSMX_CTXT_USER(fi_context) = buf; PSMX_CTXT_EP(fi_context) = ep_priv; err = psm_mq_imrecv(ep_priv->domain->psm_mq, 0, /*flags*/ buf, len, context, &psm_req); if (err != PSM_OK) return psmx_errno(err); PSMX_CTXT_REQ(fi_context) = psm_req; return 0; } #endif if (tag & ep_priv->domain->reserved_tag_bits) { FI_WARN(&psmx_prov, FI_LOG_EP_DATA, "using reserved tag bits." "tag=%lx. reserved_bits=%lx.\n", tag, ep_priv->domain->reserved_tag_bits); } psm_tag = tag & (~ep_priv->domain->reserved_tag_bits); psm_tagsel = (~ignore) | ep_priv->domain->reserved_tag_bits; if (ep_priv->recv_selective_completion && !(flags & FI_COMPLETION) && !context) { fi_context = &ep_priv->nocomp_recv_context; } else { if (!context) return -FI_EINVAL; fi_context = context; PSMX_CTXT_TYPE(fi_context) = PSMX_TRECV_CONTEXT; PSMX_CTXT_USER(fi_context) = buf; PSMX_CTXT_EP(fi_context) = ep_priv; } #if (PSM_VERNO_MAJOR >= 2) if ((ep_priv->caps & FI_DIRECTED_RECV) && src_addr != FI_ADDR_UNSPEC) { av = ep_priv->av; if (av && av->type == FI_AV_TABLE) { idx = (size_t)src_addr; if (idx >= av->last) return -FI_EINVAL; src_addr = (fi_addr_t)av->psm_epaddrs[idx]; } } else { src_addr = 0; } PSMX_SET_TAG(psm_tag2, psm_tag, 0); PSMX_SET_TAG(psm_tagsel2, psm_tagsel, 0); err = psm_mq_irecv2(ep_priv->domain->psm_mq, (psm_epaddr_t)src_addr, &psm_tag2, &psm_tagsel2, 0, /* flags */ buf, len, (void *)fi_context, &psm_req); #else err = psm_mq_irecv(ep_priv->domain->psm_mq, psm_tag, psm_tagsel, 0, /* flags */ buf, len, (void *)fi_context, &psm_req); #endif if (err != PSM_OK) return psmx_errno(err); if (fi_context == context) PSMX_CTXT_REQ(fi_context) = psm_req; return 0; }
int psmx_eq_poll_mq(struct psmx_fid_eq *eq, struct psmx_fid_domain *domain_if_null_eq) { psm_mq_req_t psm_req; psm_mq_status_t psm_status; struct fi_context *fi_context; struct psmx_fid_domain *domain; struct psmx_fid_ep *tmp_ep; struct psmx_fid_eq *tmp_eq; struct psmx_fid_cntr *tmp_cntr; struct psmx_event *event; int multi_recv; int err; if (eq) domain = eq->domain; else domain = domain_if_null_eq; while (1) { err = psm_mq_ipeek(domain->psm_mq, &psm_req, NULL); if (err == PSM_OK) { err = psm_mq_test(&psm_req, &psm_status); fi_context = psm_status.context; tmp_ep = PSMX_CTXT_EP(fi_context); tmp_eq = NULL; tmp_cntr = NULL; multi_recv = 0; switch (PSMX_CTXT_TYPE(fi_context)) { case PSMX_NOCOMP_SEND_CONTEXT: tmp_ep->pending_sends--; if (!tmp_ep->send_cntr_event_flag) tmp_cntr = tmp_ep->send_cntr; break; case PSMX_NOCOMP_RECV_CONTEXT: if (!tmp_ep->recv_cntr_event_flag) tmp_cntr = tmp_ep->recv_cntr; break; case PSMX_NOCOMP_WRITE_CONTEXT: tmp_ep->pending_writes--; if (!tmp_ep->write_cntr_event_flag) tmp_cntr = tmp_ep->write_cntr; break; case PSMX_NOCOMP_READ_CONTEXT: tmp_ep->pending_reads--; if (!tmp_ep->read_cntr_event_flag) tmp_cntr = tmp_ep->read_cntr; break; case PSMX_INJECT_CONTEXT: tmp_ep->pending_sends--; if (!tmp_ep->send_cntr_event_flag) tmp_cntr = tmp_ep->send_cntr; free(fi_context); break; case PSMX_INJECT_WRITE_CONTEXT: tmp_ep->pending_writes--; if (!tmp_ep->write_cntr_event_flag) tmp_cntr = tmp_ep->write_cntr; free(fi_context); break; case PSMX_SEND_CONTEXT: tmp_ep->pending_sends--; tmp_eq = tmp_ep->send_eq; tmp_cntr = tmp_ep->send_cntr; break; case PSMX_RECV_CONTEXT: tmp_eq = tmp_ep->recv_eq; tmp_cntr = tmp_ep->recv_cntr; break; case PSMX_MULTI_RECV_CONTEXT: multi_recv = 1; tmp_eq = tmp_ep->recv_eq; tmp_cntr = tmp_ep->recv_cntr; break; case PSMX_READ_CONTEXT: tmp_ep->pending_reads--; tmp_eq = tmp_ep->send_eq; tmp_cntr = tmp_ep->read_cntr; break; case PSMX_WRITE_CONTEXT: tmp_ep->pending_writes--; tmp_eq = tmp_ep->send_eq; tmp_cntr = tmp_ep->write_cntr; break; } if (tmp_eq) { event = psmx_eq_create_event_from_status(tmp_eq, &psm_status); if (!event) return -ENOMEM; psmx_eq_enqueue_event(tmp_eq, event); } if (tmp_cntr) tmp_cntr->cntr.ops->add(&tmp_cntr->cntr, 1); if (multi_recv) { struct psmx_multi_recv *req; psm_mq_req_t psm_req; req = PSMX_CTXT_USER(fi_context); req->offset += psm_status.nbytes; if (req->offset + req->min_buf_size <= req->len) { err = psm_mq_irecv(tmp_ep->domain->psm_mq, req->tag, req->tagsel, req->flag, req->buf + req->offset, req->len - req->offset, (void *)fi_context, &psm_req); if (err != PSM_OK) return psmx_errno(err); PSMX_CTXT_REQ(fi_context) = psm_req; } else { if (tmp_eq) { event = psmx_eq_create_event( tmp_eq, req->context, req->buf, FI_MULTI_RECV, req->len, req->len - req->offset, /* data */ 0, /* tag */ 0, /* olen */ 0); /* err */ if (!event) return -ENOMEM; psmx_eq_enqueue_event(tmp_eq, event); } free(req); } } if (!eq || tmp_eq == eq) return 1; } else if (err == PSM_MQ_NO_COMPLETIONS) { return 0; } else { return psmx_errno(err); } } }
int psmx_ep_open(struct fid_domain *domain, struct fi_info *info, struct fid_ep **ep, void *context) { struct psmx_fid_domain *domain_priv; struct psmx_fid_ep *ep_priv; int err; uint64_t ep_cap; if (info) ep_cap = info->caps; else ep_cap = FI_TAGGED; domain_priv = container_of(domain, struct psmx_fid_domain, util_domain.domain_fid.fid); if (!domain_priv) return -FI_EINVAL; err = psmx_domain_check_features(domain_priv, ep_cap); if (err) return err; ep_priv = (struct psmx_fid_ep *) calloc(1, sizeof *ep_priv); if (!ep_priv) return -FI_ENOMEM; ep_priv->ep.fid.fclass = FI_CLASS_EP; ep_priv->ep.fid.context = context; ep_priv->ep.fid.ops = &psmx_fi_ops; ep_priv->ep.ops = &psmx_ep_ops; ep_priv->ep.cm = &psmx_cm_ops; ep_priv->domain = domain_priv; atomic_initialize(&ep_priv->ref, 0); PSMX_CTXT_TYPE(&ep_priv->nocomp_send_context) = PSMX_NOCOMP_SEND_CONTEXT; PSMX_CTXT_EP(&ep_priv->nocomp_send_context) = ep_priv; PSMX_CTXT_TYPE(&ep_priv->nocomp_recv_context) = PSMX_NOCOMP_RECV_CONTEXT; PSMX_CTXT_EP(&ep_priv->nocomp_recv_context) = ep_priv; if (ep_cap & FI_TAGGED) ep_priv->ep.tagged = &psmx_tagged_ops; if (ep_cap & FI_MSG) ep_priv->ep.msg = &psmx_msg_ops; if ((ep_cap & FI_MSG) && psmx_env.am_msg) ep_priv->ep.msg = &psmx_msg2_ops; if (ep_cap & FI_RMA) ep_priv->ep.rma = &psmx_rma_ops; if (ep_cap & FI_ATOMICS) ep_priv->ep.atomic = &psmx_atomic_ops; ep_priv->caps = ep_cap; err = psmx_domain_enable_ep(domain_priv, ep_priv); if (err) { free(ep_priv); return err; } psmx_domain_acquire(domain_priv); if (info) { if (info->tx_attr) ep_priv->tx_flags = info->tx_attr->op_flags; if (info->rx_attr) ep_priv->rx_flags = info->rx_attr->op_flags; } psmx_ep_optimize_ops(ep_priv); *ep = &ep_priv->ep; return 0; }