/* op_flags=0, FI_AV_MAP */ static ssize_t psmx2_tagged_inject_no_flag_av_map(struct fid_ep *ep, const void *buf, size_t len, fi_addr_t dest_addr, uint64_t tag) { struct psmx2_fid_ep *ep_priv; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_tag_t psm2_tag; uint32_t tag32; int err; if (len > PSMX2_INJECT_SIZE) return -FI_EMSGSIZE; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); psm2_epaddr = PSMX2_ADDR_TO_EP(dest_addr); vlane = PSMX2_ADDR_TO_VL(dest_addr); tag32 = PSMX2_TAG32(0, ep_priv->vlane, vlane); PSMX2_SET_TAG(psm2_tag, tag, tag32); err = psm2_mq_send2(ep_priv->domain->psm2_mq, psm2_epaddr, 0, &psm2_tag, buf, len); if (err != PSM2_OK) return psmx2_errno(err); if (ep_priv->send_cntr) psmx2_cntr_inc(ep_priv->send_cntr); return 0; }
/* op_flags=0, FI_AV_TABLE */ static ssize_t psmx2_tagged_inject_no_flag_av_table(struct fid_ep *ep, const void *buf, size_t len, fi_addr_t dest_addr, uint64_t tag) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_tag_t psm2_tag; uint32_t tag32; int err; size_t idx; if (len > PSMX2_INJECT_SIZE) return -FI_EMSGSIZE; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); av = ep_priv->av; if (av && PSMX2_SEP_ADDR_TEST(dest_addr)) { psm2_epaddr = psmx2_av_translate_sep(av, ep_priv->trx_ctxt, dest_addr); vlane = 0; } else { idx = (size_t)dest_addr; if (idx >= av->last) return -FI_EINVAL; psm2_epaddr = av->epaddrs[idx]; vlane = av->vlanes[idx]; } tag32 = PSMX2_TAG32(0, ep_priv->vlane, vlane); PSMX2_SET_TAG(psm2_tag, tag, tag32); err = psm2_mq_send2(ep_priv->trx_ctxt->psm2_mq, psm2_epaddr, 0, &psm2_tag, buf, len); if (err != PSM2_OK) return psmx2_errno(err); if (ep_priv->send_cntr) psmx2_cntr_inc(ep_priv->send_cntr); return 0; }
ssize_t psmx2_sendv_generic(struct fid_ep *ep, const struct iovec *iov, void *desc, size_t count, fi_addr_t dest_addr, void *context, uint64_t flags, uint64_t data) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag; uint32_t tag32, tag32_base; struct fi_context * fi_context; int send_flag = 0; int err; size_t idx; int no_completion = 0; struct psmx2_cq_event *event; size_t real_count; size_t len, total_len; char *p; uint32_t *q; int i; struct psmx2_sendv_request *req; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) { struct psmx2_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -FI_ENOMEM; trigger->op = PSMX2_TRIGGERED_SENDV; trigger->cntr = container_of(ctxt->trigger.threshold.cntr, struct psmx2_fid_cntr, cntr); trigger->threshold = ctxt->trigger.threshold.threshold; trigger->sendv.ep = ep; trigger->sendv.iov = iov; trigger->sendv.desc = desc; trigger->sendv.count = count; trigger->sendv.dest_addr = dest_addr; trigger->sendv.context = context; trigger->sendv.flags = flags & ~FI_TRIGGER; trigger->sendv.data = data; psmx2_cntr_add_trigger(trigger->cntr, trigger); return 0; } total_len = 0; real_count = 0; for (i=0; i<count; i++) { if (iov[i].iov_len) { total_len += iov[i].iov_len; real_count++; } } req = malloc(sizeof(*req)); if (!req) return -FI_ENOMEM; if (total_len <= PSMX2_IOV_BUF_SIZE) { req->iov_protocol = PSMX2_IOV_PROTO_PACK; p = req->buf; for (i=0; i<count; i++) { if (iov[i].iov_len) { memcpy(p, iov[i].iov_base, iov[i].iov_len); p += iov[i].iov_len; } } tag32_base = PSMX2_MSG_BIT; len = total_len; } else { req->iov_protocol = PSMX2_IOV_PROTO_MULTI; req->iov_done = 0; req->iov_info.seq_num = (++ep_priv->iov_seq_num) % PSMX2_IOV_MAX_SEQ_NUM + 1; req->iov_info.count = (uint32_t)real_count; req->iov_info.total_len = (uint32_t)total_len; q = req->iov_info.len; for (i=0; i<count; i++) { if (iov[i].iov_len) *q++ = (uint32_t)iov[i].iov_len; } tag32_base = PSMX2_MSG_BIT | PSMX2_IOV_BIT; len = (3 + real_count) * sizeof(uint32_t); } av = ep_priv->av; if (av && av->type == FI_AV_TABLE) { idx = (size_t)dest_addr; if (idx >= av->last) { free(req); return -FI_EINVAL; } psm2_epaddr = av->epaddrs[idx]; vlane = av->vlanes[idx]; } else { psm2_epaddr = PSMX2_ADDR_TO_EP(dest_addr); vlane = PSMX2_ADDR_TO_VL(dest_addr); } tag32 = PSMX2_TAG32(tag32_base, ep_priv->vlane, vlane); if (flags & FI_REMOTE_CQ_DATA) tag32 |= PSMX2_IMM_BIT; PSMX2_SET_TAG(psm2_tag, data, tag32); if ((flags & PSMX2_NO_COMPLETION) || (ep_priv->send_selective_completion && !(flags & FI_COMPLETION))) no_completion = 1; if (flags & FI_INJECT) { if (len > PSMX2_INJECT_SIZE) { free(req); return -FI_EMSGSIZE; } err = psm2_mq_send2(ep_priv->domain->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, req->buf, len); free(req); if (err != PSM2_OK) return psmx2_errno(err); if (ep_priv->send_cntr) psmx2_cntr_inc(ep_priv->send_cntr); if (ep_priv->send_cq && !no_completion) { event = psmx2_cq_create_event( ep_priv->send_cq, context, NULL, flags, len, (uint64_t) data, 0 /* tag */, 0 /* olen */, 0 /* err */); if (event) psmx2_cq_enqueue_event(ep_priv->send_cq, event); else return -FI_ENOMEM; } return 0; } req->no_completion = no_completion; req->user_context = context; req->comp_flag = FI_MSG; fi_context = &req->fi_context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_SENDV_CONTEXT; PSMX2_CTXT_USER(fi_context) = req; PSMX2_CTXT_EP(fi_context) = ep_priv; err = psm2_mq_isend2(ep_priv->domain->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, req->buf, len, (void *)fi_context, &psm2_req); if (err != PSM2_OK) { free(req); return psmx2_errno(err); } PSMX2_CTXT_REQ(fi_context) = psm2_req; if (req->iov_protocol == PSMX2_IOV_PROTO_MULTI) { fi_context = &req->fi_context_iov; PSMX2_CTXT_TYPE(fi_context) = PSMX2_IOV_SEND_CONTEXT; PSMX2_CTXT_USER(fi_context) = req; PSMX2_CTXT_EP(fi_context) = ep_priv; tag32 &= ~PSMX2_IOV_BIT; PSMX2_TAG32_SET_SEQ(tag32, req->iov_info.seq_num); PSMX2_SET_TAG(psm2_tag, data, tag32); for (i=0; i<count; i++) { if (iov[i].iov_len) { err = psm2_mq_isend2(ep_priv->domain->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, iov[i].iov_base, iov[i].iov_len, (void *)fi_context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); } } } return 0; }
ssize_t psmx2_send_generic(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, void *context, uint64_t flags, uint64_t data) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag; uint32_t tag32; struct fi_context * fi_context; int send_flag = 0; int err; size_t idx; int no_completion = 0; struct psmx2_cq_event *event; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) { struct psmx2_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -FI_ENOMEM; trigger->op = PSMX2_TRIGGERED_SEND; trigger->cntr = container_of(ctxt->trigger.threshold.cntr, struct psmx2_fid_cntr, cntr); trigger->threshold = ctxt->trigger.threshold.threshold; trigger->send.ep = ep; trigger->send.buf = buf; trigger->send.len = len; trigger->send.desc = desc; trigger->send.dest_addr = dest_addr; trigger->send.context = context; trigger->send.flags = flags & ~FI_TRIGGER; trigger->send.data = data; psmx2_cntr_add_trigger(trigger->cntr, trigger); return 0; } av = ep_priv->av; if (av && av->type == FI_AV_TABLE) { idx = (size_t)dest_addr; if (idx >= av->last) return -FI_EINVAL; psm2_epaddr = av->epaddrs[idx]; vlane = av->vlanes[idx]; } else { psm2_epaddr = PSMX2_ADDR_TO_EP(dest_addr); vlane = PSMX2_ADDR_TO_VL(dest_addr); } tag32 = PSMX2_TAG32(PSMX2_MSG_BIT, ep_priv->vlane, vlane); if (flags & FI_REMOTE_CQ_DATA) tag32 |= PSMX2_IMM_BIT; PSMX2_SET_TAG(psm2_tag, data, tag32); if ((flags & PSMX2_NO_COMPLETION) || (ep_priv->send_selective_completion && !(flags & FI_COMPLETION))) no_completion = 1; if (flags & FI_INJECT) { if (len > PSMX2_INJECT_SIZE) return -FI_EMSGSIZE; err = psm2_mq_send2(ep_priv->domain->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, buf, len); if (err != PSM2_OK) return psmx2_errno(err); if (ep_priv->send_cntr) psmx2_cntr_inc(ep_priv->send_cntr); if (ep_priv->send_cq && !no_completion) { event = psmx2_cq_create_event( ep_priv->send_cq, context, (void *)buf, flags, len, (uint64_t) data, 0 /* tag */, 0 /* olen */, 0 /* err */); if (event) psmx2_cq_enqueue_event(ep_priv->send_cq, event); else return -FI_ENOMEM; } return 0; } if (no_completion && !context) { fi_context = &ep_priv->nocomp_send_context; } else { if (!context) return -FI_EINVAL; fi_context = context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_SEND_CONTEXT; PSMX2_CTXT_USER(fi_context) = (void *)buf; PSMX2_CTXT_EP(fi_context) = ep_priv; } err = psm2_mq_isend2(ep_priv->domain->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, buf, len, (void *)fi_context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); if (fi_context == context) PSMX2_CTXT_REQ(fi_context) = psm2_req; return 0; }
static ssize_t psmx2_rma_self(int am_cmd, struct psmx2_fid_ep *ep, void *buf, size_t len, void *desc, uint64_t addr, uint64_t key, void *context, uint64_t flags, uint64_t data) { struct psmx2_fid_mr *mr; struct psmx2_cq_event *event; struct psmx2_fid_cntr *cntr = NULL; struct psmx2_fid_cntr *mr_cntr = NULL; struct psmx2_fid_cq *cq = NULL; int no_event; int err = 0; int op_error = 0; int access; uint8_t *dst, *src; uint64_t cq_flags; struct iovec *iov = buf; size_t iov_count = len; int i; switch (am_cmd) { case PSMX2_AM_REQ_WRITE: access = FI_REMOTE_WRITE; cq_flags = FI_WRITE | FI_RMA; break; case PSMX2_AM_REQ_WRITEV: access = FI_REMOTE_WRITE; cq_flags = FI_WRITE | FI_RMA; len = 0; for (i=0; i<iov_count; i++) len += iov[i].iov_len; break; case PSMX2_AM_REQ_READ: access = FI_REMOTE_READ; cq_flags = FI_READ | FI_RMA; break; case PSMX2_AM_REQ_READV: access = FI_REMOTE_READ; cq_flags = FI_READ | FI_RMA; len = 0; for (i=0; i<iov_count; i++) len += iov[i].iov_len; break; default: return -FI_EINVAL; } mr = psmx2_mr_get(ep->domain, key); op_error = mr ? psmx2_mr_validate(mr, addr, len, access) : -FI_EINVAL; if (!op_error) { addr += mr->offset; switch (am_cmd) { case PSMX2_AM_REQ_WRITE: cntr = ep->remote_write_cntr; if (flags & FI_REMOTE_CQ_DATA) cq = ep->recv_cq; if (mr->cntr != cntr) mr_cntr = mr->cntr; memcpy((void *)addr, buf, len); break; case PSMX2_AM_REQ_WRITEV: cntr = ep->remote_write_cntr; if (flags & FI_REMOTE_CQ_DATA) cq = ep->recv_cq; if (mr->cntr != cntr) mr_cntr = mr->cntr; dst = (void *)addr; for (i=0; i<iov_count; i++) if (iov[i].iov_len) { memcpy(dst, iov[i].iov_base, iov[i].iov_len); dst += iov[i].iov_len; } break; case PSMX2_AM_REQ_READ: cntr = ep->remote_read_cntr; memcpy(buf, (void *)addr, len); break; case PSMX2_AM_REQ_READV: cntr = ep->remote_read_cntr; src = (void *)addr; for (i=0; i<iov_count; i++) if (iov[i].iov_len) { memcpy(iov[i].iov_base, src, iov[i].iov_len); src += iov[i].iov_len; } break; } if (cq) { event = psmx2_cq_create_event( cq, 0, /* context */ (void *)addr, FI_REMOTE_WRITE | FI_RMA | FI_REMOTE_CQ_DATA, len, data, 0, /* tag */ 0, /* olen */ 0 /* err */); if (event) psmx2_cq_enqueue_event(cq, event); else err = -FI_ENOMEM; } if (cntr) psmx2_cntr_inc(cntr); if (mr_cntr) psmx2_cntr_inc(mr_cntr); } no_event = (flags & PSMX2_NO_COMPLETION) || (ep->send_selective_completion && !(flags & FI_COMPLETION)); if (ep->send_cq && (!no_event || op_error)) { event = psmx2_cq_create_event( ep->send_cq, context, (void *)buf, cq_flags, len, 0, /* data */ 0, /* tag */ 0, /* olen */ op_error); if (event) psmx2_cq_enqueue_event(ep->send_cq, event); else err = -FI_ENOMEM; } switch (am_cmd) { case PSMX2_AM_REQ_WRITE: case PSMX2_AM_REQ_WRITEV: if (ep->write_cntr) psmx2_cntr_inc(ep->write_cntr); break; case PSMX2_AM_REQ_READ: case PSMX2_AM_REQ_READV: if (ep->read_cntr) psmx2_cntr_inc(ep->read_cntr); break; } return err; }
int psmx2_am_rma_handler(psm2_am_token_t token, psm2_amarg_t *args, int nargs, void *src, uint32_t len, void *hctx) { psm2_amarg_t rep_args[8]; uint8_t *rma_addr; ssize_t rma_len; uint64_t key; int err = 0; int op_error = 0; int cmd, eom, has_data; struct psmx2_am_request *req; struct psmx2_cq_event *event; uint64_t offset; struct psmx2_fid_mr *mr; psm2_epaddr_t epaddr; struct psmx2_trx_ctxt *rx; psm2_am_get_source(token, &epaddr); cmd = PSMX2_AM_GET_OP(args[0].u32w0); eom = args[0].u32w0 & PSMX2_AM_EOM; has_data = args[0].u32w0 & PSMX2_AM_DATA; switch (cmd) { case PSMX2_AM_REQ_WRITE: rx = (struct psmx2_trx_ctxt *)hctx; rma_len = args[0].u32w1; rma_addr = (uint8_t *)(uintptr_t)args[2].u64; key = args[3].u64; mr = psmx2_mr_get(rx->domain, key); op_error = mr ? psmx2_mr_validate(mr, (uint64_t)rma_addr, len, FI_REMOTE_WRITE) : -FI_EINVAL; if (!op_error) { rma_addr += mr->offset; memcpy(rma_addr, src, len); if (eom) { if (rx->ep->recv_cq && has_data) { /* TODO: report the addr/len of the whole write */ event = psmx2_cq_create_event( rx->ep->recv_cq, 0, /* context */ rma_addr, FI_REMOTE_WRITE | FI_RMA | FI_REMOTE_CQ_DATA, rma_len, args[4].u64, 0, /* tag */ 0, /* olen */ 0); if (event) psmx2_cq_enqueue_event(rx->ep->recv_cq, event); else err = -FI_ENOMEM; } if (rx->ep->remote_write_cntr) psmx2_cntr_inc(rx->ep->remote_write_cntr); if (mr->cntr && mr->cntr != rx->ep->remote_write_cntr) psmx2_cntr_inc(mr->cntr); } } if (eom || op_error) { rep_args[0].u32w0 = PSMX2_AM_REP_WRITE | eom; rep_args[0].u32w1 = op_error; rep_args[1].u64 = args[1].u64; err = psm2_am_reply_short(token, PSMX2_AM_RMA_HANDLER, rep_args, 2, NULL, 0, 0, NULL, NULL ); } break; case PSMX2_AM_REQ_WRITE_LONG: rx = (struct psmx2_trx_ctxt *)hctx; rma_len = args[0].u32w1; rma_addr = (uint8_t *)(uintptr_t)args[2].u64; key = args[3].u64; mr = psmx2_mr_get(rx->domain, key); op_error = mr ? psmx2_mr_validate(mr, (uint64_t)rma_addr, rma_len, FI_REMOTE_WRITE) : -FI_EINVAL; if (op_error) { rep_args[0].u32w0 = PSMX2_AM_REP_WRITE | eom; rep_args[0].u32w1 = op_error; rep_args[1].u64 = args[1].u64; err = psm2_am_reply_short(token, PSMX2_AM_RMA_HANDLER, rep_args, 2, NULL, 0, 0, NULL, NULL ); break; } rma_addr += mr->offset; req = psmx2_am_request_alloc(rx); if (!req) { err = -FI_ENOMEM; } else { req->ep = rx->ep; req->op = args[0].u32w0; req->write.addr = (uint64_t)rma_addr; req->write.len = rma_len; req->write.key = key; req->write.context = (void *)args[1].u64; req->write.peer_addr = (void *)epaddr; req->write.data = has_data ? args[4].u64 : 0; req->cq_flags = FI_REMOTE_WRITE | FI_RMA | (has_data ? FI_REMOTE_CQ_DATA : 0), PSMX2_CTXT_TYPE(&req->fi_context) = PSMX2_REMOTE_WRITE_CONTEXT; PSMX2_CTXT_USER(&req->fi_context) = mr; psmx2_am_enqueue_rma(rx, req); } break; case PSMX2_AM_REQ_READ: rx = (struct psmx2_trx_ctxt *)hctx; rma_len = args[0].u32w1; rma_addr = (uint8_t *)(uintptr_t)args[2].u64; key = args[3].u64; offset = args[4].u64; mr = psmx2_mr_get(rx->domain, key); op_error = mr ? psmx2_mr_validate(mr, (uint64_t)rma_addr, rma_len, FI_REMOTE_READ) : -FI_EINVAL; if (!op_error) { rma_addr += mr->offset; } else { rma_addr = NULL; rma_len = 0; } rep_args[0].u32w0 = PSMX2_AM_REP_READ | eom; rep_args[0].u32w1 = op_error; rep_args[1].u64 = args[1].u64; rep_args[2].u64 = offset; err = psm2_am_reply_short(token, PSMX2_AM_RMA_HANDLER, rep_args, 3, rma_addr, rma_len, 0, NULL, NULL ); if (eom && !op_error) { if (rx->ep->remote_read_cntr) psmx2_cntr_inc(rx->ep->remote_read_cntr); } break; case PSMX2_AM_REQ_READ_LONG: rx = (struct psmx2_trx_ctxt *)hctx; rma_len = args[0].u32w1; rma_addr = (uint8_t *)(uintptr_t)args[2].u64; key = args[3].u64; mr = psmx2_mr_get(rx->domain, key); op_error = mr ? psmx2_mr_validate(mr, (uint64_t)rma_addr, rma_len, FI_REMOTE_READ) : -FI_EINVAL; if (op_error) { rep_args[0].u32w0 = PSMX2_AM_REP_READ | eom; rep_args[0].u32w1 = op_error; rep_args[1].u64 = args[1].u64; rep_args[2].u64 = 0; err = psm2_am_reply_short(token, PSMX2_AM_RMA_HANDLER, rep_args, 3, NULL, 0, 0, NULL, NULL ); break; } rma_addr += mr->offset; req = psmx2_am_request_alloc(rx); if (!req) { err = -FI_ENOMEM; } else { req->ep = rx->ep; req->op = args[0].u32w0; req->read.addr = (uint64_t)rma_addr; req->read.len = rma_len; req->read.key = key; req->read.context = (void *)args[1].u64; req->read.peer_addr = (void *)epaddr; PSMX2_CTXT_TYPE(&req->fi_context) = PSMX2_REMOTE_READ_CONTEXT; PSMX2_CTXT_USER(&req->fi_context) = mr; psmx2_am_enqueue_rma(rx, req); } break; case PSMX2_AM_REP_WRITE: req = (struct psmx2_am_request *)(uintptr_t)args[1].u64; assert(req->op == PSMX2_AM_REQ_WRITE); op_error = (int)args[0].u32w1; if (!req->error) req->error = op_error; if (eom) { if (req->ep->send_cq && (!req->no_event || req->error)) { event = psmx2_cq_create_event( req->ep->send_cq, req->write.context, req->write.buf, req->cq_flags, req->write.len, 0, /* data */ 0, /* tag */ 0, /* olen */ req->error); if (event) psmx2_cq_enqueue_event(req->ep->send_cq, event); else err = -FI_ENOMEM; } if (req->ep->write_cntr) psmx2_cntr_inc(req->ep->write_cntr); free(req->tmpbuf); psmx2_am_request_free(req->ep->tx, req); } break; case PSMX2_AM_REP_READ: req = (struct psmx2_am_request *)(uintptr_t)args[1].u64; assert(req->op == PSMX2_AM_REQ_READ || req->op == PSMX2_AM_REQ_READV); op_error = (int)args[0].u32w1; offset = args[2].u64; if (!req->error) req->error = op_error; if (!op_error) { if (req->op == PSMX2_AM_REQ_READ) memcpy(req->read.buf + offset, src, len); else psmx2_iov_copy(req->iov, req->read.iov_count, offset, src, len); req->read.len_read += len; } if (eom || req->read.len == req->read.len_read) { if (!eom) FI_INFO(&psmx2_prov, FI_LOG_EP_DATA, "readv: short protocol finishes after long protocol.\n"); if (req->ep->send_cq && (!req->no_event || req->error)) { event = psmx2_cq_create_event( req->ep->send_cq, req->read.context, req->read.buf, req->cq_flags, req->read.len_read, 0, /* data */ 0, /* tag */ req->read.len - req->read.len_read, req->error); if (event) psmx2_cq_enqueue_event(req->ep->send_cq, event); else err = -FI_ENOMEM; } if (req->ep->read_cntr) psmx2_cntr_inc(req->ep->read_cntr); free(req->tmpbuf); psmx2_am_request_free(req->ep->tx, req); } break; default: err = -FI_EINVAL; } return err; }
ssize_t psmx2_tagged_recv_generic(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context, uint64_t flags) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; uint32_t tag32, tagsel32; struct fi_context *fi_context; size_t idx; int err; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_PEEK) return psmx2_tagged_peek_generic(ep, buf, len, desc, src_addr, tag, ignore, context, flags); if (flags & FI_TRIGGER) { struct psmx2_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -FI_ENOMEM; trigger->op = PSMX2_TRIGGERED_TRECV; trigger->cntr = container_of(ctxt->trigger.threshold.cntr, struct psmx2_fid_cntr, cntr); trigger->threshold = ctxt->trigger.threshold.threshold; trigger->trecv.ep = ep; trigger->trecv.buf = buf; trigger->trecv.len = len; trigger->trecv.desc = desc; trigger->trecv.src_addr = src_addr; trigger->trecv.tag = tag; trigger->trecv.ignore = ignore; trigger->trecv.context = context; trigger->trecv.flags = flags & ~FI_TRIGGER; psmx2_cntr_add_trigger(trigger->cntr, trigger); return 0; } if (flags & FI_CLAIM) { if (!context) return -FI_EINVAL; if (flags & FI_DISCARD) { psm2_mq_status2_t psm2_status; struct psmx2_cq_event *event; fi_context = context; psm2_req = PSMX2_CTXT_REQ(fi_context); err = psm2_mq_imrecv(ep_priv->trx_ctxt->psm2_mq, 0, NULL, 0, context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); psm2_mq_wait2(&psm2_req, &psm2_status); if (ep_priv->recv_cq && (!ep_priv->recv_selective_completion || (flags & FI_COMPLETION))) { tag = PSMX2_GET_TAG64(psm2_status.msg_tag); event = psmx2_cq_create_event( ep_priv->recv_cq, context, /* op_context */ NULL, /* buf */ flags|FI_RECV|FI_TAGGED,/* flags */ 0, /* len */ 0, /* data */ tag, /* tag */ 0, /* olen */ 0); /* err */ if (!event) return -FI_ENOMEM; vlane = PSMX2_TAG32_GET_SRC(psm2_status.msg_tag.tag2); event->source_is_valid = 1; event->source = PSMX2_EP_TO_ADDR(psm2_status.msg_peer, vlane); event->source_av = ep_priv->av; psmx2_cq_enqueue_event(ep_priv->recv_cq, event); } if (ep_priv->recv_cntr) psmx2_cntr_inc(ep_priv->recv_cntr); return 0; } fi_context = context; psm2_req = PSMX2_CTXT_REQ(fi_context); PSMX2_CTXT_TYPE(fi_context) = PSMX2_TRECV_CONTEXT; PSMX2_CTXT_USER(fi_context) = buf; PSMX2_CTXT_EP(fi_context) = ep_priv; err = psm2_mq_imrecv(ep_priv->trx_ctxt->psm2_mq, 0, buf, len, context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); PSMX2_CTXT_REQ(fi_context) = psm2_req; return 0; } if (ep_priv->recv_selective_completion && !(flags & FI_COMPLETION)) { fi_context = psmx2_ep_get_op_context(ep_priv); PSMX2_CTXT_TYPE(fi_context) = PSMX2_NOCOMP_RECV_CONTEXT_ALLOC; PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_CTXT_USER(fi_context) = buf; PSMX2_CTXT_SIZE(fi_context) = len; } else { if (!context) return -FI_EINVAL; fi_context = context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_TRECV_CONTEXT; PSMX2_CTXT_USER(fi_context) = buf; PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_CTXT_SIZE(fi_context) = len; } if ((ep_priv->caps & FI_DIRECTED_RECV) && src_addr != FI_ADDR_UNSPEC) { av = ep_priv->av; if (av && PSMX2_SEP_ADDR_TEST(src_addr)) { psm2_epaddr = psmx2_av_translate_sep(av, ep_priv->trx_ctxt, src_addr); vlane = 0; } else if (av && av->type == FI_AV_TABLE) { idx = (size_t)src_addr; if (idx >= av->last) return -FI_EINVAL; psm2_epaddr = av->epaddrs[idx]; vlane = av->vlanes[idx]; } else { psm2_epaddr = PSMX2_ADDR_TO_EP(src_addr); vlane = PSMX2_ADDR_TO_VL(src_addr); } tag32 = PSMX2_TAG32(0, vlane, ep_priv->vlane); tagsel32 = ~PSMX2_IOV_BIT; } else { psm2_epaddr = 0; tag32 = PSMX2_TAG32(0, 0, ep_priv->vlane); tagsel32 = ~(PSMX2_IOV_BIT | PSMX2_SRC_BITS); } PSMX2_SET_TAG(psm2_tag, tag, tag32); PSMX2_SET_TAG(psm2_tagsel, ~ignore, tagsel32); err = psm2_mq_irecv2(ep_priv->trx_ctxt->psm2_mq, psm2_epaddr, &psm2_tag, &psm2_tagsel, 0, buf, len, (void *)fi_context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); if (fi_context == context) PSMX2_CTXT_REQ(fi_context) = psm2_req; return 0; }
ssize_t psmx2_sendv_generic(struct fid_ep *ep, const struct iovec *iov, void **desc, size_t count, fi_addr_t dest_addr, void *context, uint64_t flags, uint64_t data) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag; uint32_t msg_flags; struct fi_context * fi_context; int send_flag = 0; int err; int no_completion = 0; struct psmx2_cq_event *event; size_t real_count; size_t len, total_len; char *p; uint32_t *q; int i, j; struct psmx2_sendv_request *req; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) return psmx2_trigger_queue_sendv(ep, iov, desc, count, dest_addr, context, flags, data); total_len = 0; real_count = 0; for (i=0; i<count; i++) { if (iov[i].iov_len) { total_len += iov[i].iov_len; real_count++; j = i; } } if (real_count == 1) return psmx2_send_generic(ep, iov[j].iov_base, iov[j].iov_len, desc ? desc[j] : NULL, dest_addr, context, flags, data); req = malloc(sizeof(*req)); if (!req) return -FI_ENOMEM; if (total_len <= PSMX2_IOV_BUF_SIZE) { req->iov_protocol = PSMX2_IOV_PROTO_PACK; p = req->buf; for (i=0; i<count; i++) { if (iov[i].iov_len) { memcpy(p, iov[i].iov_base, iov[i].iov_len); p += iov[i].iov_len; } } msg_flags = PSMX2_TYPE_MSG; len = total_len; } else { req->iov_protocol = PSMX2_IOV_PROTO_MULTI; req->iov_done = 0; req->iov_info.seq_num = (++ep_priv->iov_seq_num) % PSMX2_IOV_MAX_SEQ_NUM + 1; req->iov_info.count = (uint32_t)real_count; req->iov_info.total_len = (uint32_t)total_len; q = req->iov_info.len; for (i=0; i<count; i++) { if (iov[i].iov_len) *q++ = (uint32_t)iov[i].iov_len; } msg_flags = PSMX2_TYPE_MSG | PSMX2_IOV_BIT; len = (3 + real_count) * sizeof(uint32_t); } av = ep_priv->av; assert(av); psm2_epaddr = psmx2_av_translate_addr(av, ep_priv->tx, dest_addr, av->type); if (flags & FI_REMOTE_CQ_DATA) msg_flags |= PSMX2_IMM_BIT; PSMX2_SET_TAG(psm2_tag, 0ULL, data, msg_flags); if ((flags & PSMX2_NO_COMPLETION) || (ep_priv->send_selective_completion && !(flags & FI_COMPLETION))) no_completion = 1; if (flags & FI_INJECT) { if (len > psmx2_env.inject_size) { free(req); return -FI_EMSGSIZE; } err = psm2_mq_send2(ep_priv->tx->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, req->buf, len); free(req); if (err != PSM2_OK) return psmx2_errno(err); if (ep_priv->send_cntr) psmx2_cntr_inc(ep_priv->send_cntr, 0); if (ep_priv->send_cq && !no_completion) { event = psmx2_cq_create_event( ep_priv->send_cq, context, NULL, flags, len, (uint64_t) data, 0 /* tag */, 0 /* olen */, 0 /* err */); if (event) psmx2_cq_enqueue_event(ep_priv->send_cq, event); else return -FI_ENOMEM; } return 0; } req->no_completion = no_completion; req->user_context = context; req->comp_flag = FI_MSG; fi_context = &req->fi_context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_SENDV_CONTEXT; PSMX2_CTXT_USER(fi_context) = req; PSMX2_CTXT_EP(fi_context) = ep_priv; err = psm2_mq_isend2(ep_priv->tx->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, req->buf, len, (void *)fi_context, &psm2_req); if (err != PSM2_OK) { free(req); return psmx2_errno(err); } PSMX2_CTXT_REQ(fi_context) = psm2_req; if (req->iov_protocol == PSMX2_IOV_PROTO_MULTI) { fi_context = &req->fi_context_iov; PSMX2_CTXT_TYPE(fi_context) = PSMX2_IOV_SEND_CONTEXT; PSMX2_CTXT_USER(fi_context) = req; PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_SET_TAG(psm2_tag, req->iov_info.seq_num, 0, PSMX2_TYPE_IOV_PAYLOAD); for (i=0; i<count; i++) { if (iov[i].iov_len) { err = psm2_mq_isend2(ep_priv->tx->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, iov[i].iov_base, iov[i].iov_len, (void *)fi_context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); } } } return 0; }
ssize_t psmx2_send_generic(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, void *context, uint64_t flags, uint64_t data) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag; struct fi_context * fi_context; int send_flag = 0; int err; int no_completion = 0; struct psmx2_cq_event *event; int have_data = (flags & FI_REMOTE_CQ_DATA) > 0; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) return psmx2_trigger_queue_send(ep, buf, len, desc, dest_addr, context, flags, data); av = ep_priv->av; assert(av); psm2_epaddr = psmx2_av_translate_addr(av, ep_priv->tx, dest_addr, av->type); PSMX2_SET_TAG(psm2_tag, 0, data, PSMX2_TYPE_MSG | PSMX2_IMM_BIT_SET(have_data)); if ((flags & PSMX2_NO_COMPLETION) || (ep_priv->send_selective_completion && !(flags & FI_COMPLETION))) no_completion = 1; if (flags & FI_INJECT) { if (len > psmx2_env.inject_size) return -FI_EMSGSIZE; err = psm2_mq_send2(ep_priv->tx->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, buf, len); if (err != PSM2_OK) return psmx2_errno(err); if (ep_priv->send_cntr) psmx2_cntr_inc(ep_priv->send_cntr, 0); if (ep_priv->send_cq && !no_completion) { event = psmx2_cq_create_event( ep_priv->send_cq, context, (void *)buf, flags, len, (uint64_t) data, 0 /* tag */, 0 /* olen */, 0 /* err */); if (event) psmx2_cq_enqueue_event(ep_priv->send_cq, event); else return -FI_ENOMEM; } return 0; } if (no_completion) { fi_context = &ep_priv->nocomp_send_context; } else { assert(context); fi_context = context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_SEND_CONTEXT; PSMX2_CTXT_USER(fi_context) = (void *)buf; PSMX2_CTXT_EP(fi_context) = ep_priv; } err = psm2_mq_isend2(ep_priv->tx->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, buf, len, (void *)fi_context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); if (fi_context == context) PSMX2_CTXT_REQ(fi_context) = psm2_req; return 0; }