/* op_flags=0, FI_SELECTIVE_COMPLETION not set, FI_AV_TABLE */ static ssize_t psmx2_tagged_recv_no_flag_av_table(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; uint32_t tag32, tagsel32; struct fi_context *fi_context; size_t idx; int err; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); fi_context = context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_TRECV_CONTEXT; PSMX2_CTXT_USER(fi_context) = buf; PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_CTXT_SIZE(fi_context) = len; if ((ep_priv->caps & FI_DIRECTED_RECV) && src_addr != FI_ADDR_UNSPEC) { av = ep_priv->av; if (av && PSMX2_SEP_ADDR_TEST(src_addr)) { psm2_epaddr = psmx2_av_translate_sep(av, ep_priv->trx_ctxt, src_addr); vlane = 0; } else { idx = (size_t)src_addr; if (idx >= av->last) return -FI_EINVAL; psm2_epaddr = av->epaddrs[idx]; vlane = av->vlanes[idx]; } tag32 = PSMX2_TAG32(0, vlane, ep_priv->vlane); tagsel32 = ~PSMX2_IOV_BIT; } else { psm2_epaddr = 0; tag32 = PSMX2_TAG32(0, 0, ep_priv->vlane); tagsel32 = ~(PSMX2_IOV_BIT | PSMX2_SRC_BITS); } PSMX2_SET_TAG(psm2_tag, tag, tag32); PSMX2_SET_TAG(psm2_tagsel, ~ignore, tagsel32); err = psm2_mq_irecv2(ep_priv->trx_ctxt->psm2_mq, psm2_epaddr, &psm2_tag, &psm2_tagsel, 0, buf, len, (void *)fi_context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); PSMX2_CTXT_REQ(fi_context) = psm2_req; return 0; }
/* op_flags=0, FI_SELECTIVE_COMPLETION set, FI_AV_TABLE */ static ssize_t psmx2_tagged_recv_no_event_av_table(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; uint32_t tag32, tagsel32; struct fi_context *fi_context; size_t idx; int err; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); fi_context = psmx2_ep_get_op_context(ep_priv); PSMX2_CTXT_TYPE(fi_context) = PSMX2_NOCOMP_RECV_CONTEXT_ALLOC; PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_CTXT_USER(fi_context) = buf; PSMX2_CTXT_SIZE(fi_context) = len; if ((ep_priv->caps & FI_DIRECTED_RECV) && src_addr != FI_ADDR_UNSPEC) { av = ep_priv->av; idx = (size_t)src_addr; if (idx >= av->last) return -FI_EINVAL; psm2_epaddr = av->epaddrs[idx]; vlane = av->vlanes[idx]; tag32 = PSMX2_TAG32(0, vlane, ep_priv->vlane); tagsel32 = ~PSMX2_IOV_BIT; } else { psm2_epaddr = 0; tag32 = PSMX2_TAG32(0, 0, ep_priv->vlane); tagsel32 = ~(PSMX2_IOV_BIT | PSMX2_SRC_BITS); } PSMX2_SET_TAG(psm2_tag, tag, tag32); PSMX2_SET_TAG(psm2_tagsel, ~ignore, tagsel32); err = psm2_mq_irecv2(ep_priv->domain->psm2_mq, psm2_epaddr, &psm2_tag, &psm2_tagsel, 0, buf, len, (void *)fi_context, &psm2_req); return psmx2_errno(err); }
/* op_flags=0, FI_SELECTIVE_COMPLETION set, FI_AV_MAP */ static ssize_t psmx2_tagged_recv_no_event_av_map(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; uint32_t tag32, tagsel32; struct fi_context *fi_context; int err; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); fi_context = psmx2_ep_get_op_context(ep_priv); PSMX2_CTXT_TYPE(fi_context) = PSMX2_NOCOMP_RECV_CONTEXT_ALLOC; PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_CTXT_USER(fi_context) = buf; PSMX2_CTXT_SIZE(fi_context) = len; if ((ep_priv->caps & FI_DIRECTED_RECV) && src_addr != FI_ADDR_UNSPEC) { av = ep_priv->av; if (av && PSMX2_SEP_ADDR_TEST(src_addr)) { psm2_epaddr = psmx2_av_translate_sep(av, ep_priv->trx_ctxt, src_addr); vlane = 0; } else { psm2_epaddr = PSMX2_ADDR_TO_EP(src_addr); vlane = PSMX2_ADDR_TO_VL(src_addr); } tag32 = PSMX2_TAG32(0, vlane, ep_priv->vlane); tagsel32 = ~PSMX2_IOV_BIT; } else { psm2_epaddr = 0; tag32 = PSMX2_TAG32(0, 0, ep_priv->vlane); tagsel32 = ~(PSMX2_IOV_BIT | PSMX2_SRC_BITS); } PSMX2_SET_TAG(psm2_tag, tag, tag32); PSMX2_SET_TAG(psm2_tagsel, ~ignore, tagsel32); err = psm2_mq_irecv2(ep_priv->trx_ctxt->psm2_mq, psm2_epaddr, &psm2_tag, &psm2_tagsel, 0, buf, len, (void *)fi_context, &psm2_req); return psmx2_errno(err); }
/* op_flags=0, FI_SELECTIVE_COMPLETION not set, FI_AV_TABLE */ static ssize_t psmx2_tagged_send_no_flag_av_table(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, uint64_t tag, void *context) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag; uint32_t tag32; struct fi_context *fi_context; size_t idx; int err; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); av = ep_priv->av; if (av && PSMX2_SEP_ADDR_TEST(dest_addr)) { psm2_epaddr = psmx2_av_translate_sep(av, ep_priv->trx_ctxt, dest_addr); vlane = 0; } else { idx = (size_t)dest_addr; if (idx >= av->last) return -FI_EINVAL; psm2_epaddr = av->epaddrs[idx]; vlane = av->vlanes[idx]; } tag32 = PSMX2_TAG32(0, ep_priv->vlane, vlane); PSMX2_SET_TAG(psm2_tag, tag, tag32); fi_context = context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_TSEND_CONTEXT; PSMX2_CTXT_USER(fi_context) = (void *)buf; PSMX2_CTXT_EP(fi_context) = ep_priv; err = psm2_mq_isend2(ep_priv->trx_ctxt->psm2_mq, psm2_epaddr, 0, &psm2_tag, buf, len, (void*)fi_context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); PSMX2_CTXT_REQ(fi_context) = psm2_req; return 0; }
/* op_flags=0, FI_SELECTIVE_COMPLETION not set, FI_AV_MAP */ static ssize_t psmx2_tagged_recv_no_flag_av_map(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context) { struct psmx2_fid_ep *ep_priv; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; uint32_t tag32, tagsel32; struct fi_context *fi_context; int err; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); fi_context = context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_TRECV_CONTEXT; PSMX2_CTXT_USER(fi_context) = buf; PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_CTXT_SIZE(fi_context) = len; if ((ep_priv->caps & FI_DIRECTED_RECV) && src_addr != FI_ADDR_UNSPEC) { psm2_epaddr = PSMX2_ADDR_TO_EP(src_addr); vlane = PSMX2_ADDR_TO_VL(src_addr); tag32 = PSMX2_TAG32(0, vlane, ep_priv->vlane); tagsel32 = ~PSMX2_IOV_BIT; } else { psm2_epaddr = 0; tag32 = PSMX2_TAG32(0, 0, ep_priv->vlane); tagsel32 = ~(PSMX2_IOV_BIT | PSMX2_SRC_BITS); } PSMX2_SET_TAG(psm2_tag, tag, tag32); PSMX2_SET_TAG(psm2_tagsel, ~ignore, tagsel32); err = psm2_mq_irecv2(ep_priv->domain->psm2_mq, psm2_epaddr, &psm2_tag, &psm2_tagsel, 0, buf, len, (void *)fi_context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); PSMX2_CTXT_REQ(fi_context) = psm2_req; return 0; }
/* op_flags=0, FI_SELECTIVE_COMPLETION not set, FI_AV_MAP */ static ssize_t psmx2_tagged_send_no_flag_av_map(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, uint64_t tag, void *context) { struct psmx2_fid_ep *ep_priv; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag; uint32_t tag32; struct fi_context *fi_context; int err; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); psm2_epaddr = PSMX2_ADDR_TO_EP(dest_addr); vlane = PSMX2_ADDR_TO_VL(dest_addr); tag32 = PSMX2_TAG32(0, ep_priv->vlane, vlane); PSMX2_SET_TAG(psm2_tag, tag, tag32); fi_context = context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_TSEND_CONTEXT; PSMX2_CTXT_USER(fi_context) = (void *)buf; PSMX2_CTXT_EP(fi_context) = ep_priv; err = psm2_mq_isend2(ep_priv->domain->psm2_mq, psm2_epaddr, 0, &psm2_tag, buf, len, (void*)fi_context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); PSMX2_CTXT_REQ(fi_context) = psm2_req; return 0; }
ssize_t psmx2_read_generic(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context, uint64_t flags) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; struct psmx2_epaddr_context *epaddr_context; struct psmx2_am_request *req; psm2_amarg_t args[8]; int chunk_size; size_t offset = 0; psm2_epaddr_t psm2_epaddr; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; size_t idx; int err; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) return psmx2_trigger_queue_read(ep, buf, len, desc, src_addr, addr, key, context, flags); if (!buf) return -FI_EINVAL; av = ep_priv->av; if (av && PSMX2_SEP_ADDR_TEST(src_addr)) { psm2_epaddr = psmx2_av_translate_sep(av, ep_priv->tx, src_addr); } else if (av && av->type == FI_AV_TABLE) { idx = src_addr; if ((err = psmx2_av_check_table_idx(av, ep_priv->tx, idx))) return err; psm2_epaddr = av->tables[ep_priv->tx->id].epaddrs[idx]; } else { if (!src_addr) return -FI_EINVAL; psm2_epaddr = PSMX2_ADDR_TO_EP(src_addr); } epaddr_context = psm2_epaddr_getctxt((void *)psm2_epaddr); if (epaddr_context->epid == ep_priv->tx->psm2_epid) return psmx2_rma_self(PSMX2_AM_REQ_READ, ep_priv, buf, len, desc, addr, key, context, flags, 0); req = psmx2_am_request_alloc(ep_priv->tx); if (!req) return -FI_ENOMEM; req->op = PSMX2_AM_REQ_READ; req->read.buf = buf; req->read.len = len; req->read.addr = addr; /* needed? */ req->read.key = key; /* needed? */ req->read.context = context; req->ep = ep_priv; req->cq_flags = FI_READ | FI_RMA; PSMX2_CTXT_TYPE(&req->fi_context) = PSMX2_READ_CONTEXT; PSMX2_CTXT_USER(&req->fi_context) = context; PSMX2_CTXT_EP(&req->fi_context) = ep_priv; if (ep_priv->send_selective_completion && !(flags & FI_COMPLETION)) { PSMX2_CTXT_TYPE(&req->fi_context) = PSMX2_NOCOMP_READ_CONTEXT; req->no_event = 1; } chunk_size = ep_priv->tx->psm2_am_param.max_reply_short; args[0].u32w0 = 0; if (psmx2_env.tagged_rma && len > chunk_size) { PSMX2_SET_TAG(psm2_tag, (uint64_t)req, 0, PSMX2_RMA_TYPE_READ); PSMX2_SET_MASK(psm2_tagsel, PSMX2_MATCH_ALL, PSMX2_RMA_TYPE_MASK); psm2_mq_irecv2(ep_priv->tx->psm2_mq, psm2_epaddr, &psm2_tag, &psm2_tagsel, 0, buf, len, (void *)&req->fi_context, &psm2_req); PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_READ_LONG); args[0].u32w1 = len; args[1].u64 = (uint64_t)req; args[2].u64 = addr; args[3].u64 = key; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, 4, NULL, 0, 0, NULL, NULL); return 0; } PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_READ); args[1].u64 = (uint64_t)(uintptr_t)req; args[3].u64 = key; while (len > chunk_size) { args[0].u32w1 = chunk_size; args[2].u64 = addr; args[4].u64 = offset; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, 5, NULL, 0, 0, NULL, NULL); addr += chunk_size; len -= chunk_size; offset += chunk_size; } PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_EOM); args[0].u32w1 = len; args[2].u64 = addr; args[4].u64 = offset; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, 5, NULL, 0, 0, NULL, NULL); return 0; }
ssize_t psmx2_writev_generic(struct fid_ep *ep, const struct iovec *iov, void **desc, size_t count, fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context, uint64_t flags, uint64_t data) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; struct psmx2_epaddr_context *epaddr_context; struct psmx2_am_request *req; psm2_amarg_t args[8]; int nargs; int am_flags = PSM2_AM_FLAG_ASYNC; int chunk_size; psm2_epaddr_t psm2_epaddr; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag; size_t idx; void *psm2_context; int no_event; size_t total_len, len, len_sent; uint8_t *buf, *p; int i; int err; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) return psmx2_trigger_queue_writev(ep, iov, desc, count, dest_addr, addr, key, context, flags, data); av = ep_priv->av; if (av && PSMX2_SEP_ADDR_TEST(dest_addr)) { psm2_epaddr = psmx2_av_translate_sep(av, ep_priv->tx, dest_addr); } else if (av && av->type == FI_AV_TABLE) { idx = dest_addr; if ((err = psmx2_av_check_table_idx(av, ep_priv->tx, idx))) return err; psm2_epaddr = av->tables[ep_priv->tx->id].epaddrs[idx]; } else { if (!dest_addr) return -FI_EINVAL; psm2_epaddr = PSMX2_ADDR_TO_EP(dest_addr); } epaddr_context = psm2_epaddr_getctxt((void *)psm2_epaddr); if (epaddr_context->epid == ep_priv->tx->psm2_epid) return psmx2_rma_self(PSMX2_AM_REQ_WRITEV, ep_priv, (void *)iov, count, desc, addr, key, context, flags, data); no_event = (flags & PSMX2_NO_COMPLETION) || (ep_priv->send_selective_completion && !(flags & FI_COMPLETION)); total_len = 0; for (i=0; i<count; i++) total_len += iov[i].iov_len; chunk_size = ep_priv->tx->psm2_am_param.max_request_short; req = psmx2_am_request_alloc(ep_priv->tx); if (!req) return -FI_ENOMEM; /* Case 1: fit into a AM message, then pack and send */ if (total_len <= chunk_size) { req->tmpbuf = malloc(total_len); if (!req->tmpbuf) { psmx2_am_request_free(ep_priv->tx, req); return -FI_ENOMEM; } p = req->tmpbuf; for (i=0; i<count; i++) { if (iov[i].iov_len) { memcpy(p, iov[i].iov_base, iov[i].iov_len); p += iov[i].iov_len; } } buf = req->tmpbuf; len = total_len; req->no_event = no_event; req->op = PSMX2_AM_REQ_WRITE; req->write.buf = (void *)buf; req->write.len = len; req->write.addr = addr; /* needed? */ req->write.key = key; /* needed? */ req->write.context = context; req->ep = ep_priv; req->cq_flags = FI_WRITE | FI_RMA; PSMX2_CTXT_USER(&req->fi_context) = context; PSMX2_CTXT_EP(&req->fi_context) = ep_priv; args[0].u32w0 = 0; PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_WRITE); args[0].u32w1 = len; args[1].u64 = (uint64_t)(uintptr_t)req; args[2].u64 = addr; args[3].u64 = key; nargs = 4; if (flags & FI_REMOTE_CQ_DATA) { PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_DATA | PSMX2_AM_EOM); args[4].u64 = data; nargs++; } else { PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_EOM); } psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, nargs, (void *)buf, len, am_flags, NULL, NULL); return 0; } if (flags & FI_INJECT) { psmx2_am_request_free(ep_priv->tx, req); return -FI_EMSGSIZE; } PSMX2_CTXT_TYPE(&req->fi_context) = no_event ? PSMX2_NOCOMP_WRITE_CONTEXT : PSMX2_WRITE_CONTEXT; req->no_event = no_event; req->op = PSMX2_AM_REQ_WRITE; req->write.buf = (void *)iov[0].iov_base; req->write.len = total_len; req->write.addr = addr; /* needed? */ req->write.key = key; /* needed? */ req->write.context = context; req->ep = ep_priv; req->cq_flags = FI_WRITE | FI_RMA; PSMX2_CTXT_USER(&req->fi_context) = context; PSMX2_CTXT_EP(&req->fi_context) = ep_priv; /* Case 2: send iov in sequence */ args[0].u32w0 = 0; len_sent = 0; for (i=0; i<count; i++) { if (!iov[i].iov_len) continue; /* Case 2.1: use long protocol for the last segment if it is large */ if (psmx2_env.tagged_rma && iov[i].iov_len > chunk_size && len_sent + iov[i].iov_len == total_len) { PSMX2_SET_TAG(psm2_tag, (uint64_t)req, 0, PSMX2_RMA_TYPE_WRITE); PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_WRITE_LONG); args[0].u32w1 = iov[i].iov_len; args[1].u64 = (uint64_t)req; args[2].u64 = addr; args[3].u64 = key; nargs = 4; if (flags & FI_REMOTE_CQ_DATA) { PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_DATA); args[4].u64 = data; nargs++; } if (flags & FI_DELIVERY_COMPLETE) { args[0].u32w0 |= PSMX2_AM_FORCE_ACK; psm2_context = NULL; } else { psm2_context = (void *)&req->fi_context; } psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, nargs, NULL, 0, am_flags, NULL, NULL); psm2_mq_isend2(ep_priv->tx->psm2_mq, psm2_epaddr, 0, &psm2_tag, iov[i].iov_base, iov[i].iov_len, psm2_context, &psm2_req); return 0; } /* Case 2.2: use short protocol all other segments */ PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_WRITE); nargs = 4; buf = iov[i].iov_base; len = iov[i].iov_len; while (len > chunk_size) { args[0].u32w1 = chunk_size; args[1].u64 = (uint64_t)(uintptr_t)req; args[2].u64 = addr; args[3].u64 = key; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, nargs, (void *)buf, chunk_size, am_flags, NULL, NULL); buf += chunk_size; addr += chunk_size; len -= chunk_size; len_sent += chunk_size; } args[0].u32w1 = len; args[1].u64 = (uint64_t)(uintptr_t)req; args[2].u64 = addr; args[3].u64 = key; if (len_sent + len == total_len) { if (flags & FI_REMOTE_CQ_DATA) { PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_DATA | PSMX2_AM_EOM); args[4].u64 = data; nargs++; } else { PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_EOM); } } psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, nargs, (void *)buf, len, am_flags, NULL, NULL); addr += len; len_sent += len; } return 0; }
int psmx2_ep_open(struct fid_domain *domain, struct fi_info *info, struct fid_ep **ep, void *context) { struct psmx2_fid_domain *domain_priv; struct psmx2_fid_ep *ep_priv; uint8_t vlane; uint64_t ep_cap; int err = -FI_EINVAL; if (info) ep_cap = info->caps; else ep_cap = FI_TAGGED; domain_priv = container_of(domain, struct psmx2_fid_domain, domain.fid); if (!domain_priv) goto errout; err = psmx2_domain_check_features(domain_priv, ep_cap); if (err) goto errout; err = psmx2_alloc_vlane(domain_priv, &vlane); if (err) goto errout; ep_priv = (struct psmx2_fid_ep *) calloc(1, sizeof *ep_priv); if (!ep_priv) { err = -FI_ENOMEM; goto errout_free_vlane; } ep_priv->ep.fid.fclass = FI_CLASS_EP; ep_priv->ep.fid.context = context; ep_priv->ep.fid.ops = &psmx2_fi_ops; ep_priv->ep.ops = &psmx2_ep_ops; ep_priv->ep.cm = &psmx2_cm_ops; ep_priv->domain = domain_priv; ep_priv->vlane = vlane; PSMX2_CTXT_TYPE(&ep_priv->nocomp_send_context) = PSMX2_NOCOMP_SEND_CONTEXT; PSMX2_CTXT_EP(&ep_priv->nocomp_send_context) = ep_priv; PSMX2_CTXT_TYPE(&ep_priv->nocomp_recv_context) = PSMX2_NOCOMP_RECV_CONTEXT; PSMX2_CTXT_EP(&ep_priv->nocomp_recv_context) = ep_priv; if (ep_cap & FI_TAGGED) ep_priv->ep.tagged = &psmx2_tagged_ops; if (ep_cap & FI_MSG) ep_priv->ep.msg = &psmx2_msg_ops; if (ep_cap & FI_RMA) ep_priv->ep.rma = &psmx2_rma_ops; if (ep_cap & FI_ATOMICS) ep_priv->ep.atomic = &psmx2_atomic_ops; ep_priv->caps = ep_cap; err = psmx2_domain_enable_ep(domain_priv, ep_priv); if (err) goto errout_free_ep; psmx2_domain_acquire(domain_priv); domain_priv->eps[ep_priv->vlane] = ep_priv; if (info) { if (info->tx_attr) ep_priv->flags = info->tx_attr->op_flags; if (info->rx_attr) ep_priv->flags |= info->rx_attr->op_flags; } psmx2_ep_optimize_ops(ep_priv); *ep = &ep_priv->ep; return 0; errout_free_ep: free(ep_priv); errout_free_vlane: psmx2_free_vlane(domain_priv, vlane); errout: return err; }
ssize_t psmx2_write_generic(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context, uint64_t flags, uint64_t data) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; struct psmx2_epaddr_context *epaddr_context; struct psmx2_am_request *req; psm2_amarg_t args[8]; int nargs; int am_flags = PSM2_AM_FLAG_ASYNC; int chunk_size; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag; uint32_t tag32; size_t idx; void *psm2_context; int no_event; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) { struct psmx2_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -FI_ENOMEM; trigger->op = PSMX2_TRIGGERED_WRITE; trigger->cntr = container_of(ctxt->trigger.threshold.cntr, struct psmx2_fid_cntr, cntr); trigger->threshold = ctxt->trigger.threshold.threshold; trigger->write.ep = ep; trigger->write.buf = buf; trigger->write.len = len; trigger->write.desc = desc; trigger->write.dest_addr = dest_addr; trigger->write.addr = addr; trigger->write.key = key; trigger->write.context = context; trigger->write.flags = flags & ~FI_TRIGGER; trigger->write.data = data; psmx2_cntr_add_trigger(trigger->cntr, trigger); return 0; } if (!buf) return -FI_EINVAL; av = ep_priv->av; if (av && av->type == FI_AV_TABLE) { idx = dest_addr; if (idx >= av->last) return -FI_EINVAL; psm2_epaddr = av->epaddrs[idx]; vlane = av->vlanes[idx]; } else { if (!dest_addr) return -FI_EINVAL; psm2_epaddr = PSMX2_ADDR_TO_EP(dest_addr); vlane = PSMX2_ADDR_TO_VL(dest_addr); } epaddr_context = psm2_epaddr_getctxt((void *)psm2_epaddr); if (epaddr_context->epid == ep_priv->domain->psm2_epid) return psmx2_rma_self(PSMX2_AM_REQ_WRITE, ep_priv, ep_priv->domain->eps[vlane], (void *)buf, len, desc, addr, key, context, flags, data); no_event = (flags & PSMX2_NO_COMPLETION) || (ep_priv->send_selective_completion && !(flags & FI_COMPLETION)); if (flags & FI_INJECT) { if (len > PSMX2_INJECT_SIZE) return -FI_EMSGSIZE; req = malloc(sizeof(*req) + len); if (!req) return -FI_ENOMEM; memset(req, 0, sizeof(*req)); memcpy((uint8_t *)req + sizeof(*req), (void *)buf, len); buf = (uint8_t *)req + sizeof(*req); } else { req = calloc(1, sizeof(*req)); if (!req) return -FI_ENOMEM; PSMX2_CTXT_TYPE(&req->fi_context) = no_event ? PSMX2_NOCOMP_WRITE_CONTEXT : PSMX2_WRITE_CONTEXT; } req->no_event = no_event; req->op = PSMX2_AM_REQ_WRITE; req->write.buf = (void *)buf; req->write.len = len; req->write.addr = addr; /* needed? */ req->write.key = key; /* needed? */ req->write.context = context; req->ep = ep_priv; req->cq_flags = FI_WRITE | FI_RMA; PSMX2_CTXT_USER(&req->fi_context) = context; PSMX2_CTXT_EP(&req->fi_context) = ep_priv; chunk_size = psmx2_am_param.max_request_short; args[0].u32w0 = 0; PSMX2_AM_SET_SRC(args[0].u32w0, ep_priv->vlane); PSMX2_AM_SET_DST(args[0].u32w0, vlane); if (psmx2_env.tagged_rma && len > chunk_size) { tag32 = PSMX2_TAG32(PSMX2_RMA_BIT, ep_priv->vlane, vlane); PSMX2_SET_TAG(psm2_tag, (uint64_t)req, tag32); PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_WRITE_LONG); args[0].u32w1 = len; args[1].u64 = (uint64_t)req; args[2].u64 = addr; args[3].u64 = key; nargs = 4; if (flags & FI_REMOTE_CQ_DATA) { PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_DATA); args[4].u64 = data; nargs++; } if (flags & FI_DELIVERY_COMPLETE) { args[0].u32w0 |= PSMX2_AM_FORCE_ACK; psm2_context = NULL; } else { psm2_context = (void *)&req->fi_context; } psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, nargs, NULL, 0, am_flags, NULL, NULL); psm2_mq_isend2(ep_priv->domain->psm2_mq, psm2_epaddr, 0, &psm2_tag, buf, len, psm2_context, &psm2_req); return 0; } PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_WRITE); nargs = 4; while (len > chunk_size) { args[0].u32w1 = chunk_size; args[1].u64 = (uint64_t)(uintptr_t)req; args[2].u64 = addr; args[3].u64 = key; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, nargs, (void *)buf, chunk_size, am_flags, NULL, NULL); buf = (const uint8_t *)buf + chunk_size; addr += chunk_size; len -= chunk_size; } args[0].u32w1 = len; args[1].u64 = (uint64_t)(uintptr_t)req; args[2].u64 = addr; args[3].u64 = key; if (flags & FI_REMOTE_CQ_DATA) { PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_DATA | PSMX2_AM_EOM); args[4].u64 = data; nargs++; } else { PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_EOM); } psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, nargs, (void *)buf, len, am_flags, NULL, NULL); return 0; }
ssize_t psmx2_readv_generic(struct fid_ep *ep, const struct iovec *iov, void *desc, size_t count, fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context, uint64_t flags) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; struct psmx2_epaddr_context *epaddr_context; struct psmx2_am_request *req; psm2_amarg_t args[8]; int chunk_size; size_t offset = 0; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; uint32_t tag32; size_t idx; size_t total_len, long_len, short_len; void *long_buf; int i; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) { struct psmx2_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -FI_ENOMEM; trigger->op = PSMX2_TRIGGERED_READV; trigger->cntr = container_of(ctxt->trigger.threshold.cntr, struct psmx2_fid_cntr, cntr); trigger->threshold = ctxt->trigger.threshold.threshold; trigger->readv.ep = ep; trigger->readv.iov = iov; trigger->readv.count = count; trigger->readv.desc = desc; trigger->readv.src_addr = src_addr; trigger->readv.addr = addr; trigger->readv.key = key; trigger->readv.context = context; trigger->readv.flags = flags & ~FI_TRIGGER; psmx2_cntr_add_trigger(trigger->cntr, trigger); return 0; } av = ep_priv->av; if (av && av->type == FI_AV_TABLE) { idx = src_addr; if (idx >= av->last) return -FI_EINVAL; psm2_epaddr = av->epaddrs[idx]; vlane = av->vlanes[idx]; } else { if (!src_addr) return -FI_EINVAL; psm2_epaddr = PSMX2_ADDR_TO_EP(src_addr); vlane = PSMX2_ADDR_TO_VL(src_addr); } epaddr_context = psm2_epaddr_getctxt((void *)psm2_epaddr); if (epaddr_context->epid == ep_priv->domain->psm2_epid) return psmx2_rma_self(PSMX2_AM_REQ_READV, ep_priv, ep_priv->domain->eps[vlane], (void *)iov, count, desc, addr, key, context, flags, 0); total_len = 0; for (i=0; i<count; i++) total_len += iov[i].iov_len; req = calloc(1, sizeof(*req) + count * sizeof(struct iovec)); if (!req) return -FI_ENOMEM; memcpy(req->iov, iov, count * sizeof(struct iovec)); req->op = PSMX2_AM_REQ_READV; req->read.iov_count = count; req->read.len = total_len; req->read.addr = addr; /* needed? */ req->read.key = key; /* needed? */ req->read.context = context; req->ep = ep_priv; req->cq_flags = FI_READ | FI_RMA; PSMX2_CTXT_TYPE(&req->fi_context) = PSMX2_READ_CONTEXT; PSMX2_CTXT_USER(&req->fi_context) = context; PSMX2_CTXT_EP(&req->fi_context) = ep_priv; if (ep_priv->send_selective_completion && !(flags & FI_COMPLETION)) { PSMX2_CTXT_TYPE(&req->fi_context) = PSMX2_NOCOMP_READ_CONTEXT; req->no_event = 1; } chunk_size = psmx2_am_param.max_reply_short; long_len = 0; if (psmx2_env.tagged_rma) { for (i=count-1; i>=0; i--) { if (iov[i].iov_len > chunk_size) { long_buf = iov[i].iov_base; long_len = iov[i].iov_len; break; } else if (iov[i].iov_len) { break; } } } short_len = total_len - long_len; /* Use short protocol for all but the last segment (long_len) */ args[0].u32w0 = 0; PSMX2_AM_SET_SRC(args[0].u32w0, ep_priv->vlane); PSMX2_AM_SET_DST(args[0].u32w0, vlane); PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_READ); args[1].u64 = (uint64_t)(uintptr_t)req; args[3].u64 = key; while (short_len > chunk_size) { args[0].u32w1 = chunk_size; args[2].u64 = addr; args[4].u64 = offset; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, 5, NULL, 0, 0, NULL, NULL); addr += chunk_size; short_len -= chunk_size; offset += chunk_size; } if (!long_len) PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_EOM); args[0].u32w1 = short_len; args[2].u64 = addr; args[4].u64 = offset; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, 5, NULL, 0, 0, NULL, NULL); /* Use the long protocol for the last segment */ if (long_len) { tag32 = PSMX2_TAG32(PSMX2_RMA_BIT, vlane, ep_priv->vlane); PSMX2_SET_TAG(psm2_tag, (uint64_t)req, tag32); PSMX2_SET_TAG(psm2_tagsel, -1ULL, -1); psm2_mq_irecv2(ep_priv->domain->psm2_mq, psm2_epaddr, &psm2_tag, &psm2_tagsel, 0, long_buf, long_len, (void *)&req->fi_context, &psm2_req); PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_READ_LONG); args[0].u32w1 = long_len; args[1].u64 = (uint64_t)req; args[2].u64 = addr + short_len; args[3].u64 = key; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, 4, NULL, 0, 0, NULL, NULL); } return 0; }
ssize_t psmx2_sendv_generic(struct fid_ep *ep, const struct iovec *iov, void **desc, size_t count, fi_addr_t dest_addr, void *context, uint64_t flags, uint64_t data) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag; uint32_t msg_flags; struct fi_context * fi_context; int send_flag = 0; int err; int no_completion = 0; struct psmx2_cq_event *event; size_t real_count; size_t len, total_len; char *p; uint32_t *q; int i, j; struct psmx2_sendv_request *req; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) return psmx2_trigger_queue_sendv(ep, iov, desc, count, dest_addr, context, flags, data); total_len = 0; real_count = 0; for (i=0; i<count; i++) { if (iov[i].iov_len) { total_len += iov[i].iov_len; real_count++; j = i; } } if (real_count == 1) return psmx2_send_generic(ep, iov[j].iov_base, iov[j].iov_len, desc ? desc[j] : NULL, dest_addr, context, flags, data); req = malloc(sizeof(*req)); if (!req) return -FI_ENOMEM; if (total_len <= PSMX2_IOV_BUF_SIZE) { req->iov_protocol = PSMX2_IOV_PROTO_PACK; p = req->buf; for (i=0; i<count; i++) { if (iov[i].iov_len) { memcpy(p, iov[i].iov_base, iov[i].iov_len); p += iov[i].iov_len; } } msg_flags = PSMX2_TYPE_MSG; len = total_len; } else { req->iov_protocol = PSMX2_IOV_PROTO_MULTI; req->iov_done = 0; req->iov_info.seq_num = (++ep_priv->iov_seq_num) % PSMX2_IOV_MAX_SEQ_NUM + 1; req->iov_info.count = (uint32_t)real_count; req->iov_info.total_len = (uint32_t)total_len; q = req->iov_info.len; for (i=0; i<count; i++) { if (iov[i].iov_len) *q++ = (uint32_t)iov[i].iov_len; } msg_flags = PSMX2_TYPE_MSG | PSMX2_IOV_BIT; len = (3 + real_count) * sizeof(uint32_t); } av = ep_priv->av; assert(av); psm2_epaddr = psmx2_av_translate_addr(av, ep_priv->tx, dest_addr, av->type); if (flags & FI_REMOTE_CQ_DATA) msg_flags |= PSMX2_IMM_BIT; PSMX2_SET_TAG(psm2_tag, 0ULL, data, msg_flags); if ((flags & PSMX2_NO_COMPLETION) || (ep_priv->send_selective_completion && !(flags & FI_COMPLETION))) no_completion = 1; if (flags & FI_INJECT) { if (len > psmx2_env.inject_size) { free(req); return -FI_EMSGSIZE; } err = psm2_mq_send2(ep_priv->tx->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, req->buf, len); free(req); if (err != PSM2_OK) return psmx2_errno(err); if (ep_priv->send_cntr) psmx2_cntr_inc(ep_priv->send_cntr, 0); if (ep_priv->send_cq && !no_completion) { event = psmx2_cq_create_event( ep_priv->send_cq, context, NULL, flags, len, (uint64_t) data, 0 /* tag */, 0 /* olen */, 0 /* err */); if (event) psmx2_cq_enqueue_event(ep_priv->send_cq, event); else return -FI_ENOMEM; } return 0; } req->no_completion = no_completion; req->user_context = context; req->comp_flag = FI_MSG; fi_context = &req->fi_context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_SENDV_CONTEXT; PSMX2_CTXT_USER(fi_context) = req; PSMX2_CTXT_EP(fi_context) = ep_priv; err = psm2_mq_isend2(ep_priv->tx->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, req->buf, len, (void *)fi_context, &psm2_req); if (err != PSM2_OK) { free(req); return psmx2_errno(err); } PSMX2_CTXT_REQ(fi_context) = psm2_req; if (req->iov_protocol == PSMX2_IOV_PROTO_MULTI) { fi_context = &req->fi_context_iov; PSMX2_CTXT_TYPE(fi_context) = PSMX2_IOV_SEND_CONTEXT; PSMX2_CTXT_USER(fi_context) = req; PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_SET_TAG(psm2_tag, req->iov_info.seq_num, 0, PSMX2_TYPE_IOV_PAYLOAD); for (i=0; i<count; i++) { if (iov[i].iov_len) { err = psm2_mq_isend2(ep_priv->tx->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, iov[i].iov_base, iov[i].iov_len, (void *)fi_context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); } } } return 0; }
ssize_t psmx2_recv_generic(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr, void *context, uint64_t flags) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; uint32_t tag32, tagsel32; struct fi_context *fi_context; int recv_flag = 0; size_t idx; int err; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) { struct psmx2_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -FI_ENOMEM; trigger->op = PSMX2_TRIGGERED_RECV; trigger->cntr = container_of(ctxt->trigger.threshold.cntr, struct psmx2_fid_cntr, cntr); trigger->threshold = ctxt->trigger.threshold.threshold; trigger->recv.ep = ep; trigger->recv.buf = buf; trigger->recv.len = len; trigger->recv.desc = desc; trigger->recv.src_addr = src_addr; trigger->recv.context = context; trigger->recv.flags = flags & ~FI_TRIGGER; psmx2_cntr_add_trigger(trigger->cntr, trigger); return 0; } if ((ep_priv->caps & FI_DIRECTED_RECV) && src_addr != FI_ADDR_UNSPEC) { av = ep_priv->av; if (av && av->type == FI_AV_TABLE) { idx = (size_t)src_addr; if (idx >= av->last) return -FI_EINVAL; psm2_epaddr = av->epaddrs[idx]; vlane = av->vlanes[idx]; } else { psm2_epaddr = PSMX2_ADDR_TO_EP(src_addr); vlane = PSMX2_ADDR_TO_VL(src_addr); } tag32 = PSMX2_TAG32(PSMX2_MSG_BIT, vlane, ep_priv->vlane); tagsel32 = ~(PSMX2_IOV_BIT | PSMX2_IMM_BIT); } else { psm2_epaddr = 0; tag32 = PSMX2_TAG32(PSMX2_MSG_BIT, 0, ep_priv->vlane); tagsel32 = ~(PSMX2_IOV_BIT | PSMX2_IMM_BIT | PSMX2_SRC_BITS); } PSMX2_SET_TAG(psm2_tag, 0ULL, tag32); PSMX2_SET_TAG(psm2_tagsel, 0ULL, tagsel32); if (ep_priv->recv_selective_completion && !(flags & FI_COMPLETION)) { fi_context = psmx2_ep_get_op_context(ep_priv); PSMX2_CTXT_TYPE(fi_context) = PSMX2_NOCOMP_RECV_CONTEXT_ALLOC; PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_CTXT_USER(fi_context) = buf; PSMX2_CTXT_SIZE(fi_context) = len; } else { if (!context) return -FI_EINVAL; fi_context = context; if (flags & FI_MULTI_RECV) { struct psmx2_multi_recv *req; req = calloc(1, sizeof(*req)); if (!req) return -FI_ENOMEM; req->src_addr = psm2_epaddr; req->tag = psm2_tag; req->tagsel = psm2_tagsel; req->flag = recv_flag; req->buf = buf; req->len = len; req->offset = 0; req->min_buf_size = ep_priv->min_multi_recv; req->context = fi_context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_MULTI_RECV_CONTEXT; PSMX2_CTXT_USER(fi_context) = req; if (len > PSMX2_MAX_MSG_SIZE) len = PSMX2_MAX_MSG_SIZE; } else { PSMX2_CTXT_TYPE(fi_context) = PSMX2_RECV_CONTEXT; PSMX2_CTXT_USER(fi_context) = buf; } PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_CTXT_SIZE(fi_context) = len; } err = psm2_mq_irecv2(ep_priv->domain->psm2_mq, psm2_epaddr, &psm2_tag, &psm2_tagsel, recv_flag, buf, len, (void *)fi_context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); if (fi_context == context) PSMX2_CTXT_REQ(fi_context) = psm2_req; return 0; }
ssize_t psmx2_send_generic(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, void *context, uint64_t flags, uint64_t data) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag; uint32_t tag32; struct fi_context * fi_context; int send_flag = 0; int err; size_t idx; int no_completion = 0; struct psmx2_cq_event *event; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) { struct psmx2_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -FI_ENOMEM; trigger->op = PSMX2_TRIGGERED_SEND; trigger->cntr = container_of(ctxt->trigger.threshold.cntr, struct psmx2_fid_cntr, cntr); trigger->threshold = ctxt->trigger.threshold.threshold; trigger->send.ep = ep; trigger->send.buf = buf; trigger->send.len = len; trigger->send.desc = desc; trigger->send.dest_addr = dest_addr; trigger->send.context = context; trigger->send.flags = flags & ~FI_TRIGGER; trigger->send.data = data; psmx2_cntr_add_trigger(trigger->cntr, trigger); return 0; } av = ep_priv->av; if (av && av->type == FI_AV_TABLE) { idx = (size_t)dest_addr; if (idx >= av->last) return -FI_EINVAL; psm2_epaddr = av->epaddrs[idx]; vlane = av->vlanes[idx]; } else { psm2_epaddr = PSMX2_ADDR_TO_EP(dest_addr); vlane = PSMX2_ADDR_TO_VL(dest_addr); } tag32 = PSMX2_TAG32(PSMX2_MSG_BIT, ep_priv->vlane, vlane); if (flags & FI_REMOTE_CQ_DATA) tag32 |= PSMX2_IMM_BIT; PSMX2_SET_TAG(psm2_tag, data, tag32); if ((flags & PSMX2_NO_COMPLETION) || (ep_priv->send_selective_completion && !(flags & FI_COMPLETION))) no_completion = 1; if (flags & FI_INJECT) { if (len > PSMX2_INJECT_SIZE) return -FI_EMSGSIZE; err = psm2_mq_send2(ep_priv->domain->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, buf, len); if (err != PSM2_OK) return psmx2_errno(err); if (ep_priv->send_cntr) psmx2_cntr_inc(ep_priv->send_cntr); if (ep_priv->send_cq && !no_completion) { event = psmx2_cq_create_event( ep_priv->send_cq, context, (void *)buf, flags, len, (uint64_t) data, 0 /* tag */, 0 /* olen */, 0 /* err */); if (event) psmx2_cq_enqueue_event(ep_priv->send_cq, event); else return -FI_ENOMEM; } return 0; } if (no_completion && !context) { fi_context = &ep_priv->nocomp_send_context; } else { if (!context) return -FI_EINVAL; fi_context = context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_SEND_CONTEXT; PSMX2_CTXT_USER(fi_context) = (void *)buf; PSMX2_CTXT_EP(fi_context) = ep_priv; } err = psm2_mq_isend2(ep_priv->domain->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, buf, len, (void *)fi_context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); if (fi_context == context) PSMX2_CTXT_REQ(fi_context) = psm2_req; return 0; }
ssize_t psmx2_recv_generic(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr, void *context, uint64_t flags) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; struct fi_context *fi_context; int recv_flag = 0; int err; int enable_completion; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) return psmx2_trigger_queue_recv(ep, buf, len, desc, src_addr, context, flags); if ((ep_priv->caps & FI_DIRECTED_RECV) && src_addr != FI_ADDR_UNSPEC) { av = ep_priv->av; assert(av); psm2_epaddr = psmx2_av_translate_addr(av, ep_priv->rx, src_addr, av->type); } else { psm2_epaddr = 0; } PSMX2_SET_TAG(psm2_tag, 0ULL, 0, PSMX2_TYPE_MSG); PSMX2_SET_MASK(psm2_tagsel, PSMX2_MATCH_NONE, PSMX2_TYPE_MASK); enable_completion = !ep_priv->recv_selective_completion || (flags & FI_COMPLETION); if (enable_completion) { assert(context); fi_context = context; if (flags & FI_MULTI_RECV) { struct psmx2_multi_recv *req; req = calloc(1, sizeof(*req)); if (!req) return -FI_ENOMEM; req->src_addr = psm2_epaddr; req->tag = psm2_tag; req->tagsel = psm2_tagsel; req->flag = recv_flag; req->buf = buf; req->len = len; req->offset = 0; req->min_buf_size = ep_priv->min_multi_recv; req->context = fi_context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_MULTI_RECV_CONTEXT; PSMX2_CTXT_USER(fi_context) = req; if (len > PSMX2_MAX_MSG_SIZE) len = PSMX2_MAX_MSG_SIZE; } else { PSMX2_CTXT_TYPE(fi_context) = PSMX2_RECV_CONTEXT; PSMX2_CTXT_USER(fi_context) = buf; } PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_CTXT_SIZE(fi_context) = len; } else { PSMX2_EP_GET_OP_CONTEXT(ep_priv, fi_context); #if !PSMX2_USE_REQ_CONTEXT PSMX2_CTXT_TYPE(fi_context) = PSMX2_NOCOMP_RECV_CONTEXT; PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_CTXT_USER(fi_context) = buf; PSMX2_CTXT_SIZE(fi_context) = len; #endif } err = psm2_mq_irecv2(ep_priv->rx->psm2_mq, psm2_epaddr, &psm2_tag, &psm2_tagsel, recv_flag, buf, len, (void *)fi_context, &psm2_req); if (OFI_UNLIKELY(err != PSM2_OK)) return psmx2_errno(err); if (enable_completion) { PSMX2_CTXT_REQ(fi_context) = psm2_req; } else { #if PSMX2_USE_REQ_CONTEXT PSMX2_REQ_GET_OP_CONTEXT(psm2_req, fi_context); PSMX2_CTXT_TYPE(fi_context) = PSMX2_NOCOMP_RECV_CONTEXT; PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_CTXT_USER(fi_context) = buf; PSMX2_CTXT_SIZE(fi_context) = len; #endif } return 0; }
ssize_t psmx2_tagged_recv_generic(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context, uint64_t flags) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; uint32_t tag32, tagsel32; struct fi_context *fi_context; size_t idx; int err; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_PEEK) return psmx2_tagged_peek_generic(ep, buf, len, desc, src_addr, tag, ignore, context, flags); if (flags & FI_TRIGGER) { struct psmx2_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -FI_ENOMEM; trigger->op = PSMX2_TRIGGERED_TRECV; trigger->cntr = container_of(ctxt->trigger.threshold.cntr, struct psmx2_fid_cntr, cntr); trigger->threshold = ctxt->trigger.threshold.threshold; trigger->trecv.ep = ep; trigger->trecv.buf = buf; trigger->trecv.len = len; trigger->trecv.desc = desc; trigger->trecv.src_addr = src_addr; trigger->trecv.tag = tag; trigger->trecv.ignore = ignore; trigger->trecv.context = context; trigger->trecv.flags = flags & ~FI_TRIGGER; psmx2_cntr_add_trigger(trigger->cntr, trigger); return 0; } if (flags & FI_CLAIM) { if (!context) return -FI_EINVAL; if (flags & FI_DISCARD) { psm2_mq_status2_t psm2_status; struct psmx2_cq_event *event; fi_context = context; psm2_req = PSMX2_CTXT_REQ(fi_context); err = psm2_mq_imrecv(ep_priv->trx_ctxt->psm2_mq, 0, NULL, 0, context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); psm2_mq_wait2(&psm2_req, &psm2_status); if (ep_priv->recv_cq && (!ep_priv->recv_selective_completion || (flags & FI_COMPLETION))) { tag = PSMX2_GET_TAG64(psm2_status.msg_tag); event = psmx2_cq_create_event( ep_priv->recv_cq, context, /* op_context */ NULL, /* buf */ flags|FI_RECV|FI_TAGGED,/* flags */ 0, /* len */ 0, /* data */ tag, /* tag */ 0, /* olen */ 0); /* err */ if (!event) return -FI_ENOMEM; vlane = PSMX2_TAG32_GET_SRC(psm2_status.msg_tag.tag2); event->source_is_valid = 1; event->source = PSMX2_EP_TO_ADDR(psm2_status.msg_peer, vlane); event->source_av = ep_priv->av; psmx2_cq_enqueue_event(ep_priv->recv_cq, event); } if (ep_priv->recv_cntr) psmx2_cntr_inc(ep_priv->recv_cntr); return 0; } fi_context = context; psm2_req = PSMX2_CTXT_REQ(fi_context); PSMX2_CTXT_TYPE(fi_context) = PSMX2_TRECV_CONTEXT; PSMX2_CTXT_USER(fi_context) = buf; PSMX2_CTXT_EP(fi_context) = ep_priv; err = psm2_mq_imrecv(ep_priv->trx_ctxt->psm2_mq, 0, buf, len, context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); PSMX2_CTXT_REQ(fi_context) = psm2_req; return 0; } if (ep_priv->recv_selective_completion && !(flags & FI_COMPLETION)) { fi_context = psmx2_ep_get_op_context(ep_priv); PSMX2_CTXT_TYPE(fi_context) = PSMX2_NOCOMP_RECV_CONTEXT_ALLOC; PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_CTXT_USER(fi_context) = buf; PSMX2_CTXT_SIZE(fi_context) = len; } else { if (!context) return -FI_EINVAL; fi_context = context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_TRECV_CONTEXT; PSMX2_CTXT_USER(fi_context) = buf; PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_CTXT_SIZE(fi_context) = len; } if ((ep_priv->caps & FI_DIRECTED_RECV) && src_addr != FI_ADDR_UNSPEC) { av = ep_priv->av; if (av && PSMX2_SEP_ADDR_TEST(src_addr)) { psm2_epaddr = psmx2_av_translate_sep(av, ep_priv->trx_ctxt, src_addr); vlane = 0; } else if (av && av->type == FI_AV_TABLE) { idx = (size_t)src_addr; if (idx >= av->last) return -FI_EINVAL; psm2_epaddr = av->epaddrs[idx]; vlane = av->vlanes[idx]; } else { psm2_epaddr = PSMX2_ADDR_TO_EP(src_addr); vlane = PSMX2_ADDR_TO_VL(src_addr); } tag32 = PSMX2_TAG32(0, vlane, ep_priv->vlane); tagsel32 = ~PSMX2_IOV_BIT; } else { psm2_epaddr = 0; tag32 = PSMX2_TAG32(0, 0, ep_priv->vlane); tagsel32 = ~(PSMX2_IOV_BIT | PSMX2_SRC_BITS); } PSMX2_SET_TAG(psm2_tag, tag, tag32); PSMX2_SET_TAG(psm2_tagsel, ~ignore, tagsel32); err = psm2_mq_irecv2(ep_priv->trx_ctxt->psm2_mq, psm2_epaddr, &psm2_tag, &psm2_tagsel, 0, buf, len, (void *)fi_context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); if (fi_context == context) PSMX2_CTXT_REQ(fi_context) = psm2_req; return 0; }
int psmx2_handle_sendv_req(struct psmx2_fid_ep *ep, PSMX2_STATUS_TYPE *status, int multi_recv) { psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; struct psmx2_sendv_reply *rep; struct psmx2_multi_recv *recv_req; struct fi_context *fi_context; struct fi_context *recv_context; int i, err; uint8_t *recv_buf; size_t recv_len, len; if (PSMX2_STATUS_ERROR(status) != PSM2_OK) return psmx2_errno(PSMX2_STATUS_ERROR(status)); rep = malloc(sizeof(*rep)); if (!rep) { PSMX2_STATUS_ERROR(status) = PSM2_NO_MEMORY; return -FI_ENOMEM; } recv_context = PSMX2_STATUS_CONTEXT(status); if (multi_recv) { recv_req = PSMX2_CTXT_USER(recv_context); recv_buf = recv_req->buf + recv_req->offset; recv_len = recv_req->len - recv_req->offset; rep->multi_recv = 1; } else { recv_buf = PSMX2_CTXT_USER(recv_context); recv_len = PSMX2_CTXT_SIZE(recv_context); rep->multi_recv = 0; } /* assert(PSMX2_STATUS_RCVLEN(status) <= PSMX2_IOV_BUF_SIZE); */ memcpy(&rep->iov_info, recv_buf, PSMX2_STATUS_RCVLEN(status)); rep->user_context = PSMX2_STATUS_CONTEXT(status); rep->tag = PSMX2_STATUS_TAG(status); rep->buf = recv_buf; rep->no_completion = 0; rep->iov_done = 0; rep->bytes_received = 0; rep->msg_length = 0; rep->error_code = PSM2_OK; fi_context = &rep->fi_context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_IOV_RECV_CONTEXT; PSMX2_CTXT_USER(fi_context) = rep; PSMX2_CTXT_EP(fi_context) = ep; rep->comp_flag = PSMX2_IS_MSG(PSMX2_GET_FLAGS(rep->tag)) ? FI_MSG : FI_TAGGED; if (PSMX2_HAS_IMM(PSMX2_GET_FLAGS(rep->tag))) rep->comp_flag |= FI_REMOTE_CQ_DATA; /* IOV payload uses a sequence number in place of a tag. */ PSMX2_SET_TAG(psm2_tag, rep->iov_info.seq_num, 0, PSMX2_TYPE_IOV_PAYLOAD); PSMX2_SET_MASK(psm2_tagsel, PSMX2_MATCH_ALL, PSMX2_TYPE_MASK); for (i=0; i<rep->iov_info.count; i++) { if (recv_len) { len = MIN(recv_len, rep->iov_info.len[i]); err = psm2_mq_irecv2(ep->rx->psm2_mq, PSMX2_STATUS_PEER(status), &psm2_tag, &psm2_tagsel, 0/*flag*/, recv_buf, len, (void *)fi_context, &psm2_req); if (err) { PSMX2_STATUS_ERROR(status) = err; return psmx2_errno(err); } recv_buf += len; recv_len -= len; } else { /* recv buffer full, post empty recvs */ err = psm2_mq_irecv2(ep->rx->psm2_mq, PSMX2_STATUS_PEER(status), &psm2_tag, &psm2_tagsel, 0/*flag*/, NULL, 0, (void *)fi_context, &psm2_req); if (err) { PSMX2_STATUS_ERROR(status) = err; return psmx2_errno(err); } } } if (multi_recv && recv_len < recv_req->min_buf_size) rep->comp_flag |= FI_MULTI_RECV; return 0; }
ssize_t psmx2_readv_generic(struct fid_ep *ep, const struct iovec *iov, void *desc, size_t count, fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context, uint64_t flags) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; struct psmx2_epaddr_context *epaddr_context; struct psmx2_am_request *req; psm2_amarg_t args[8]; int chunk_size; size_t offset = 0; psm2_epaddr_t psm2_epaddr; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; size_t idx; size_t total_len, long_len = 0, short_len; void *long_buf = NULL; int i; int err; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) return psmx2_trigger_queue_readv(ep, iov, desc, count, src_addr, addr, key, context, flags); av = ep_priv->av; if (av && PSMX2_SEP_ADDR_TEST(src_addr)) { psm2_epaddr = psmx2_av_translate_sep(av, ep_priv->tx, src_addr); } else if (av && av->type == FI_AV_TABLE) { idx = src_addr; if ((err = psmx2_av_check_table_idx(av, ep_priv->tx, idx))) return err; psm2_epaddr = av->tables[ep_priv->tx->id].epaddrs[idx]; } else { if (!src_addr) return -FI_EINVAL; psm2_epaddr = PSMX2_ADDR_TO_EP(src_addr); } epaddr_context = psm2_epaddr_getctxt((void *)psm2_epaddr); if (epaddr_context->epid == ep_priv->tx->psm2_epid) return psmx2_rma_self(PSMX2_AM_REQ_READV, ep_priv, (void *)iov, count, desc, addr, key, context, flags, 0); total_len = 0; for (i=0; i<count; i++) total_len += iov[i].iov_len; req = psmx2_am_request_alloc(ep_priv->tx); if (!req) return -FI_ENOMEM; req->tmpbuf = malloc(count * sizeof(struct iovec)); if (!req->tmpbuf) { psmx2_am_request_free(ep_priv->tx, req); return -FI_ENOMEM; } req->iov = req->tmpbuf; memcpy(req->iov, iov, count * sizeof(struct iovec)); req->op = PSMX2_AM_REQ_READV; req->read.iov_count = count; req->read.len = total_len; req->read.addr = addr; /* needed? */ req->read.key = key; /* needed? */ req->read.context = context; req->ep = ep_priv; req->cq_flags = FI_READ | FI_RMA; PSMX2_CTXT_TYPE(&req->fi_context) = PSMX2_READ_CONTEXT; PSMX2_CTXT_USER(&req->fi_context) = context; PSMX2_CTXT_EP(&req->fi_context) = ep_priv; if (ep_priv->send_selective_completion && !(flags & FI_COMPLETION)) { PSMX2_CTXT_TYPE(&req->fi_context) = PSMX2_NOCOMP_READ_CONTEXT; req->no_event = 1; } chunk_size = ep_priv->tx->psm2_am_param.max_reply_short; if (psmx2_env.tagged_rma) { for (i=count-1; i>=0; i--) { if (iov[i].iov_len > chunk_size) { long_buf = iov[i].iov_base; long_len = iov[i].iov_len; break; } else if (iov[i].iov_len) { break; } } } short_len = total_len - long_len; /* Use short protocol for all but the last segment (long_len) */ args[0].u32w0 = 0; PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_READ); args[1].u64 = (uint64_t)(uintptr_t)req; args[3].u64 = key; while (short_len > chunk_size) { args[0].u32w1 = chunk_size; args[2].u64 = addr; args[4].u64 = offset; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, 5, NULL, 0, 0, NULL, NULL); addr += chunk_size; short_len -= chunk_size; offset += chunk_size; } if (!long_len) PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_EOM); args[0].u32w1 = short_len; args[2].u64 = addr; args[4].u64 = offset; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, 5, NULL, 0, 0, NULL, NULL); /* Use the long protocol for the last segment */ if (long_len) { PSMX2_SET_TAG(psm2_tag, (uint64_t)req, 0, PSMX2_RMA_TYPE_READ); PSMX2_SET_MASK(psm2_tagsel, PSMX2_MATCH_ALL, PSMX2_RMA_TYPE_MASK); psm2_mq_irecv2(ep_priv->tx->psm2_mq, psm2_epaddr, &psm2_tag, &psm2_tagsel, 0, long_buf, long_len, (void *)&req->fi_context, &psm2_req); PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_READ_LONG); args[0].u32w1 = long_len; args[1].u64 = (uint64_t)req; args[2].u64 = addr + short_len; args[3].u64 = key; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, 4, NULL, 0, 0, NULL, NULL); } return 0; }
int psmx2_ep_open(struct fid_domain *domain, struct fi_info *info, struct fid_ep **ep, void *context) { struct psmx2_fid_domain *domain_priv; struct psmx2_fid_ep *ep_priv; struct psmx2_context *item; uint8_t vlane; uint64_t ep_cap; int err = -FI_EINVAL; int i; if (info) ep_cap = info->caps; else ep_cap = FI_TAGGED; domain_priv = container_of(domain, struct psmx2_fid_domain, domain.fid); if (!domain_priv) goto errout; err = psmx2_domain_check_features(domain_priv, ep_cap); if (err) goto errout; err = psmx2_alloc_vlane(domain_priv, &vlane); if (err) goto errout; ep_priv = (struct psmx2_fid_ep *) calloc(1, sizeof *ep_priv); if (!ep_priv) { err = -FI_ENOMEM; goto errout_free_vlane; } ep_priv->ep.fid.fclass = FI_CLASS_EP; ep_priv->ep.fid.context = context; ep_priv->ep.fid.ops = &psmx2_fi_ops; ep_priv->ep.ops = &psmx2_ep_ops; ep_priv->ep.cm = &psmx2_cm_ops; ep_priv->domain = domain_priv; ep_priv->vlane = vlane; PSMX2_CTXT_TYPE(&ep_priv->nocomp_send_context) = PSMX2_NOCOMP_SEND_CONTEXT; PSMX2_CTXT_EP(&ep_priv->nocomp_send_context) = ep_priv; PSMX2_CTXT_TYPE(&ep_priv->nocomp_recv_context) = PSMX2_NOCOMP_RECV_CONTEXT; PSMX2_CTXT_EP(&ep_priv->nocomp_recv_context) = ep_priv; if (ep_cap & FI_TAGGED) ep_priv->ep.tagged = &psmx2_tagged_ops; if (ep_cap & FI_MSG) ep_priv->ep.msg = &psmx2_msg_ops; if (ep_cap & FI_RMA) ep_priv->ep.rma = &psmx2_rma_ops; if (ep_cap & FI_ATOMICS) ep_priv->ep.atomic = &psmx2_atomic_ops; ep_priv->caps = ep_cap; err = psmx2_domain_enable_ep(domain_priv, ep_priv); if (err) goto errout_free_ep; psmx2_domain_acquire(domain_priv); domain_priv->eps[ep_priv->vlane] = ep_priv; if (info) { if (info->tx_attr) ep_priv->flags = info->tx_attr->op_flags; if (info->rx_attr) ep_priv->flags |= info->rx_attr->op_flags; } psmx2_ep_optimize_ops(ep_priv); slist_init(&ep_priv->free_context_list); fastlock_init(&ep_priv->context_lock); #define PSMX2_FREE_CONTEXT_LIST_SIZE 64 for (i=0; i<PSMX2_FREE_CONTEXT_LIST_SIZE; i++) { item = calloc(1, sizeof(*item)); if (!item) { FI_WARN(&psmx2_prov, FI_LOG_EP_CTRL, "out of memory.\n"); exit(-1); } slist_insert_tail(&item->list_entry, &ep_priv->free_context_list); } *ep = &ep_priv->ep; return 0; errout_free_ep: free(ep_priv); errout_free_vlane: psmx2_free_vlane(domain_priv, vlane); errout: return err; }
ssize_t psmx2_write_generic(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context, uint64_t flags, uint64_t data) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; struct psmx2_epaddr_context *epaddr_context; struct psmx2_am_request *req; psm2_amarg_t args[8]; int nargs; int am_flags = PSM2_AM_FLAG_ASYNC; int chunk_size; psm2_epaddr_t psm2_epaddr; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag; size_t idx; void *psm2_context; int no_event; int err; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) return psmx2_trigger_queue_write(ep, buf, len, desc, dest_addr, addr, key, context, flags, data); if (!buf) return -FI_EINVAL; av = ep_priv->av; if (av && PSMX2_SEP_ADDR_TEST(dest_addr)) { psm2_epaddr = psmx2_av_translate_sep(av, ep_priv->tx, dest_addr); } else if (av && av->type == FI_AV_TABLE) { idx = dest_addr; if ((err = psmx2_av_check_table_idx(av, ep_priv->tx, idx))) return err; psm2_epaddr = av->tables[ep_priv->tx->id].epaddrs[idx]; } else { if (!dest_addr) return -FI_EINVAL; psm2_epaddr = PSMX2_ADDR_TO_EP(dest_addr); } epaddr_context = psm2_epaddr_getctxt((void *)psm2_epaddr); if (epaddr_context->epid == ep_priv->tx->psm2_epid) return psmx2_rma_self(PSMX2_AM_REQ_WRITE, ep_priv, (void *)buf, len, desc, addr, key, context, flags, data); no_event = (flags & PSMX2_NO_COMPLETION) || (ep_priv->send_selective_completion && !(flags & FI_COMPLETION)); req = psmx2_am_request_alloc(ep_priv->tx); if (!req) return -FI_ENOMEM; if (flags & FI_INJECT) { if (len > psmx2_env.inject_size) { psmx2_am_request_free(ep_priv->tx, req); return -FI_EMSGSIZE; } req->tmpbuf = malloc(len); if (!req->tmpbuf) { psmx2_am_request_free(ep_priv->tx, req); return -FI_ENOMEM; } memcpy(req->tmpbuf, (void *)buf, len); buf = req->tmpbuf; } else { PSMX2_CTXT_TYPE(&req->fi_context) = no_event ? PSMX2_NOCOMP_WRITE_CONTEXT : PSMX2_WRITE_CONTEXT; } req->no_event = no_event; req->op = PSMX2_AM_REQ_WRITE; req->write.buf = (void *)buf; req->write.len = len; req->write.addr = addr; /* needed? */ req->write.key = key; /* needed? */ req->write.context = context; req->ep = ep_priv; req->cq_flags = FI_WRITE | FI_RMA; PSMX2_CTXT_USER(&req->fi_context) = context; PSMX2_CTXT_EP(&req->fi_context) = ep_priv; chunk_size = ep_priv->tx->psm2_am_param.max_request_short; args[0].u32w0 = 0; if (psmx2_env.tagged_rma && len > chunk_size) { PSMX2_SET_TAG(psm2_tag, (uint64_t)req, 0, PSMX2_RMA_TYPE_WRITE); PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_WRITE_LONG); args[0].u32w1 = len; args[1].u64 = (uint64_t)req; args[2].u64 = addr; args[3].u64 = key; nargs = 4; if (flags & FI_REMOTE_CQ_DATA) { PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_DATA); args[4].u64 = data; nargs++; } if (flags & FI_DELIVERY_COMPLETE) { args[0].u32w0 |= PSMX2_AM_FORCE_ACK; psm2_context = NULL; } else { psm2_context = (void *)&req->fi_context; } psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, nargs, NULL, 0, am_flags, NULL, NULL); psm2_mq_isend2(ep_priv->tx->psm2_mq, psm2_epaddr, 0, &psm2_tag, buf, len, psm2_context, &psm2_req); return 0; } PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_WRITE); nargs = 4; while (len > chunk_size) { args[0].u32w1 = chunk_size; args[1].u64 = (uint64_t)(uintptr_t)req; args[2].u64 = addr; args[3].u64 = key; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, nargs, (void *)buf, chunk_size, am_flags, NULL, NULL); buf = (const uint8_t *)buf + chunk_size; addr += chunk_size; len -= chunk_size; } args[0].u32w1 = len; args[1].u64 = (uint64_t)(uintptr_t)req; args[2].u64 = addr; args[3].u64 = key; if (flags & FI_REMOTE_CQ_DATA) { PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_DATA | PSMX2_AM_EOM); args[4].u64 = data; nargs++; } else { PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_EOM); } psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, nargs, (void *)buf, len, am_flags, NULL, NULL); return 0; }
ssize_t psmx2_tagged_recv_generic(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context, uint64_t flags) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; uint32_t tag32, tagsel32; struct fi_context *fi_context; size_t idx; int err; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_PEEK) return psmx2_tagged_peek_generic(ep, buf, len, desc, src_addr, tag, ignore, context, flags); if (flags & FI_TRIGGER) { struct psmx2_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -FI_ENOMEM; trigger->op = PSMX2_TRIGGERED_TRECV; trigger->cntr = container_of(ctxt->trigger.threshold.cntr, struct psmx2_fid_cntr, cntr); trigger->threshold = ctxt->trigger.threshold.threshold; trigger->trecv.ep = ep; trigger->trecv.buf = buf; trigger->trecv.len = len; trigger->trecv.desc = desc; trigger->trecv.src_addr = src_addr; trigger->trecv.tag = tag; trigger->trecv.ignore = ignore; trigger->trecv.context = context; trigger->trecv.flags = flags & ~FI_TRIGGER; psmx2_cntr_add_trigger(trigger->cntr, trigger); return 0; } if (flags & FI_CLAIM) { if (!context) return -FI_EINVAL; /* TODO: handle FI_DISCARD */ fi_context = context; psm2_req = PSMX2_CTXT_REQ(fi_context); PSMX2_CTXT_TYPE(fi_context) = PSMX2_TRECV_CONTEXT; PSMX2_CTXT_USER(fi_context) = buf; PSMX2_CTXT_EP(fi_context) = ep_priv; err = psm2_mq_imrecv(ep_priv->domain->psm2_mq, 0, buf, len, context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); PSMX2_CTXT_REQ(fi_context) = psm2_req; return 0; } if (ep_priv->recv_selective_completion && !(flags & FI_COMPLETION)) { fi_context = psmx2_ep_get_op_context(ep_priv); PSMX2_CTXT_TYPE(fi_context) = PSMX2_NOCOMP_RECV_CONTEXT_ALLOC; PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_CTXT_USER(fi_context) = buf; PSMX2_CTXT_SIZE(fi_context) = len; } else { if (!context) return -FI_EINVAL; fi_context = context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_TRECV_CONTEXT; PSMX2_CTXT_USER(fi_context) = buf; PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_CTXT_SIZE(fi_context) = len; } if ((ep_priv->caps & FI_DIRECTED_RECV) && src_addr != FI_ADDR_UNSPEC) { av = ep_priv->av; if (av && av->type == FI_AV_TABLE) { idx = (size_t)src_addr; if (idx >= av->last) return -FI_EINVAL; psm2_epaddr = av->epaddrs[idx]; vlane = av->vlanes[idx]; } else { psm2_epaddr = PSMX2_ADDR_TO_EP(src_addr); vlane = PSMX2_ADDR_TO_VL(src_addr); } tag32 = PSMX2_TAG32(0, vlane, ep_priv->vlane); tagsel32 = ~PSMX2_IOV_BIT; } else { psm2_epaddr = 0; tag32 = PSMX2_TAG32(0, 0, ep_priv->vlane); tagsel32 = ~(PSMX2_IOV_BIT | PSMX2_SRC_BITS); } PSMX2_SET_TAG(psm2_tag, tag, tag32); PSMX2_SET_TAG(psm2_tagsel, ~ignore, tagsel32); err = psm2_mq_irecv2(ep_priv->domain->psm2_mq, psm2_epaddr, &psm2_tag, &psm2_tagsel, 0, buf, len, (void *)fi_context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); if (fi_context == context) PSMX2_CTXT_REQ(fi_context) = psm2_req; return 0; }
ssize_t psmx2_sendv_generic(struct fid_ep *ep, const struct iovec *iov, void *desc, size_t count, fi_addr_t dest_addr, void *context, uint64_t flags, uint64_t data) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag; uint32_t tag32, tag32_base; struct fi_context * fi_context; int send_flag = 0; int err; size_t idx; int no_completion = 0; struct psmx2_cq_event *event; size_t real_count; size_t len, total_len; char *p; uint32_t *q; int i; struct psmx2_sendv_request *req; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) { struct psmx2_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -FI_ENOMEM; trigger->op = PSMX2_TRIGGERED_SENDV; trigger->cntr = container_of(ctxt->trigger.threshold.cntr, struct psmx2_fid_cntr, cntr); trigger->threshold = ctxt->trigger.threshold.threshold; trigger->sendv.ep = ep; trigger->sendv.iov = iov; trigger->sendv.desc = desc; trigger->sendv.count = count; trigger->sendv.dest_addr = dest_addr; trigger->sendv.context = context; trigger->sendv.flags = flags & ~FI_TRIGGER; trigger->sendv.data = data; psmx2_cntr_add_trigger(trigger->cntr, trigger); return 0; } total_len = 0; real_count = 0; for (i=0; i<count; i++) { if (iov[i].iov_len) { total_len += iov[i].iov_len; real_count++; } } req = malloc(sizeof(*req)); if (!req) return -FI_ENOMEM; if (total_len <= PSMX2_IOV_BUF_SIZE) { req->iov_protocol = PSMX2_IOV_PROTO_PACK; p = req->buf; for (i=0; i<count; i++) { if (iov[i].iov_len) { memcpy(p, iov[i].iov_base, iov[i].iov_len); p += iov[i].iov_len; } } tag32_base = PSMX2_MSG_BIT; len = total_len; } else { req->iov_protocol = PSMX2_IOV_PROTO_MULTI; req->iov_done = 0; req->iov_info.seq_num = (++ep_priv->iov_seq_num) % PSMX2_IOV_MAX_SEQ_NUM + 1; req->iov_info.count = (uint32_t)real_count; req->iov_info.total_len = (uint32_t)total_len; q = req->iov_info.len; for (i=0; i<count; i++) { if (iov[i].iov_len) *q++ = (uint32_t)iov[i].iov_len; } tag32_base = PSMX2_MSG_BIT | PSMX2_IOV_BIT; len = (3 + real_count) * sizeof(uint32_t); } av = ep_priv->av; if (av && av->type == FI_AV_TABLE) { idx = (size_t)dest_addr; if (idx >= av->last) { free(req); return -FI_EINVAL; } psm2_epaddr = av->epaddrs[idx]; vlane = av->vlanes[idx]; } else { psm2_epaddr = PSMX2_ADDR_TO_EP(dest_addr); vlane = PSMX2_ADDR_TO_VL(dest_addr); } tag32 = PSMX2_TAG32(tag32_base, ep_priv->vlane, vlane); if (flags & FI_REMOTE_CQ_DATA) tag32 |= PSMX2_IMM_BIT; PSMX2_SET_TAG(psm2_tag, data, tag32); if ((flags & PSMX2_NO_COMPLETION) || (ep_priv->send_selective_completion && !(flags & FI_COMPLETION))) no_completion = 1; if (flags & FI_INJECT) { if (len > PSMX2_INJECT_SIZE) { free(req); return -FI_EMSGSIZE; } err = psm2_mq_send2(ep_priv->domain->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, req->buf, len); free(req); if (err != PSM2_OK) return psmx2_errno(err); if (ep_priv->send_cntr) psmx2_cntr_inc(ep_priv->send_cntr); if (ep_priv->send_cq && !no_completion) { event = psmx2_cq_create_event( ep_priv->send_cq, context, NULL, flags, len, (uint64_t) data, 0 /* tag */, 0 /* olen */, 0 /* err */); if (event) psmx2_cq_enqueue_event(ep_priv->send_cq, event); else return -FI_ENOMEM; } return 0; } req->no_completion = no_completion; req->user_context = context; req->comp_flag = FI_MSG; fi_context = &req->fi_context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_SENDV_CONTEXT; PSMX2_CTXT_USER(fi_context) = req; PSMX2_CTXT_EP(fi_context) = ep_priv; err = psm2_mq_isend2(ep_priv->domain->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, req->buf, len, (void *)fi_context, &psm2_req); if (err != PSM2_OK) { free(req); return psmx2_errno(err); } PSMX2_CTXT_REQ(fi_context) = psm2_req; if (req->iov_protocol == PSMX2_IOV_PROTO_MULTI) { fi_context = &req->fi_context_iov; PSMX2_CTXT_TYPE(fi_context) = PSMX2_IOV_SEND_CONTEXT; PSMX2_CTXT_USER(fi_context) = req; PSMX2_CTXT_EP(fi_context) = ep_priv; tag32 &= ~PSMX2_IOV_BIT; PSMX2_TAG32_SET_SEQ(tag32, req->iov_info.seq_num); PSMX2_SET_TAG(psm2_tag, data, tag32); for (i=0; i<count; i++) { if (iov[i].iov_len) { err = psm2_mq_isend2(ep_priv->domain->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, iov[i].iov_base, iov[i].iov_len, (void *)fi_context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); } } } return 0; }
ssize_t psmx2_writev_generic(struct fid_ep *ep, const struct iovec *iov, void **desc, size_t count, fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context, uint64_t flags, uint64_t data) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; struct psmx2_epaddr_context *epaddr_context; struct psmx2_am_request *req; psm2_amarg_t args[8]; int nargs; int am_flags = PSM2_AM_FLAG_ASYNC; int chunk_size; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag; uint32_t tag32; size_t idx; void *psm2_context; int no_event; size_t total_len, len, len_sent; uint8_t *buf, *p; int i; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) { struct psmx2_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -FI_ENOMEM; trigger->op = PSMX2_TRIGGERED_WRITEV; trigger->cntr = container_of(ctxt->trigger.threshold.cntr, struct psmx2_fid_cntr, cntr); trigger->threshold = ctxt->trigger.threshold.threshold; trigger->writev.ep = ep; trigger->writev.iov = iov; trigger->writev.count = count; trigger->writev.desc = desc; trigger->writev.dest_addr = dest_addr; trigger->writev.addr = addr; trigger->writev.key = key; trigger->writev.context = context; trigger->writev.flags = flags & ~FI_TRIGGER; trigger->writev.data = data; psmx2_cntr_add_trigger(trigger->cntr, trigger); return 0; } av = ep_priv->av; if (av && av->type == FI_AV_TABLE) { idx = dest_addr; if (idx >= av->last) return -FI_EINVAL; psm2_epaddr = av->epaddrs[idx]; vlane = av->vlanes[idx]; } else { if (!dest_addr) return -FI_EINVAL; psm2_epaddr = PSMX2_ADDR_TO_EP(dest_addr); vlane = PSMX2_ADDR_TO_VL(dest_addr); } epaddr_context = psm2_epaddr_getctxt((void *)psm2_epaddr); if (epaddr_context->epid == ep_priv->domain->psm2_epid) return psmx2_rma_self(PSMX2_AM_REQ_WRITEV, ep_priv, ep_priv->domain->eps[vlane], (void *)iov, count, desc, addr, key, context, flags, data); no_event = (flags & PSMX2_NO_COMPLETION) || (ep_priv->send_selective_completion && !(flags & FI_COMPLETION)); total_len = 0; for (i=0; i<count; i++) total_len += iov[i].iov_len; chunk_size = psmx2_am_param.max_request_short; /* Case 1: fit into a AM message, then pack and send */ if (total_len <= chunk_size) { req = malloc(sizeof(*req) + total_len); if (!req) return -FI_ENOMEM; memset(req, 0, sizeof(*req)); p = (uint8_t *)req + sizeof(*req); for (i=0; i<count; i++) { if (iov[i].iov_len) { memcpy(p, iov[i].iov_base, iov[i].iov_len); p += iov[i].iov_len; } } buf = (uint8_t *)req + sizeof(*req); len = total_len; req->no_event = no_event; req->op = PSMX2_AM_REQ_WRITE; req->write.buf = (void *)buf; req->write.len = len; req->write.addr = addr; /* needed? */ req->write.key = key; /* needed? */ req->write.context = context; req->ep = ep_priv; req->cq_flags = FI_WRITE | FI_RMA; PSMX2_CTXT_USER(&req->fi_context) = context; PSMX2_CTXT_EP(&req->fi_context) = ep_priv; args[0].u32w0 = 0; PSMX2_AM_SET_SRC(args[0].u32w0, ep_priv->vlane); PSMX2_AM_SET_DST(args[0].u32w0, vlane); PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_WRITE); args[0].u32w1 = len; args[1].u64 = (uint64_t)(uintptr_t)req; args[2].u64 = addr; args[3].u64 = key; nargs = 4; if (flags & FI_REMOTE_CQ_DATA) { PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_DATA | PSMX2_AM_EOM); args[4].u64 = data; nargs++; } else { PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_EOM); } psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, nargs, (void *)buf, len, am_flags, NULL, NULL); return 0; } if (flags & FI_INJECT) return -FI_EMSGSIZE; req = calloc(1, sizeof(*req)); if (!req) return -FI_ENOMEM; PSMX2_CTXT_TYPE(&req->fi_context) = no_event ? PSMX2_NOCOMP_WRITE_CONTEXT : PSMX2_WRITE_CONTEXT; req->no_event = no_event; req->op = PSMX2_AM_REQ_WRITE; req->write.buf = (void *)iov[0].iov_base; req->write.len = total_len; req->write.addr = addr; /* needed? */ req->write.key = key; /* needed? */ req->write.context = context; req->ep = ep_priv; req->cq_flags = FI_WRITE | FI_RMA; PSMX2_CTXT_USER(&req->fi_context) = context; PSMX2_CTXT_EP(&req->fi_context) = ep_priv; /* Case 2: send iov in sequence */ args[0].u32w0 = 0; PSMX2_AM_SET_SRC(args[0].u32w0, ep_priv->vlane); PSMX2_AM_SET_DST(args[0].u32w0, vlane); len_sent = 0; for (i=0; i<count; i++) { if (!iov[i].iov_len) continue; /* Case 2.1: use long protocol for the last segment if it is large */ if (psmx2_env.tagged_rma && iov[i].iov_len > chunk_size && len_sent + iov[i].iov_len == total_len) { tag32 = PSMX2_TAG32(PSMX2_RMA_BIT, ep_priv->vlane, vlane); PSMX2_SET_TAG(psm2_tag, (uint64_t)req, tag32); PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_WRITE_LONG); args[0].u32w1 = iov[i].iov_len; args[1].u64 = (uint64_t)req; args[2].u64 = addr; args[3].u64 = key; nargs = 4; if (flags & FI_REMOTE_CQ_DATA) { PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_DATA); args[4].u64 = data; nargs++; } if (flags & FI_DELIVERY_COMPLETE) { args[0].u32w0 |= PSMX2_AM_FORCE_ACK; psm2_context = NULL; } else { psm2_context = (void *)&req->fi_context; } psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, nargs, NULL, 0, am_flags, NULL, NULL); psm2_mq_isend2(ep_priv->domain->psm2_mq, psm2_epaddr, 0, &psm2_tag, iov[i].iov_base, iov[i].iov_len, psm2_context, &psm2_req); return 0; } /* Case 2.2: use short protocol all other segments */ PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_WRITE); nargs = 4; buf = iov[i].iov_base; len = iov[i].iov_len; while (len > chunk_size) { args[0].u32w1 = chunk_size; args[1].u64 = (uint64_t)(uintptr_t)req; args[2].u64 = addr; args[3].u64 = key; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, nargs, (void *)buf, chunk_size, am_flags, NULL, NULL); buf += chunk_size; addr += chunk_size; len -= chunk_size; len_sent += chunk_size; } args[0].u32w1 = len; args[1].u64 = (uint64_t)(uintptr_t)req; args[2].u64 = addr; args[3].u64 = key; if (len_sent + len == total_len) { if (flags & FI_REMOTE_CQ_DATA) { PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_DATA | PSMX2_AM_EOM); args[4].u64 = data; nargs++; } else { PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_EOM); } } psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, nargs, (void *)buf, len, am_flags, NULL, NULL); addr += len; len_sent += len; } return 0; }
int psmx2_handle_sendv_req(struct psmx2_fid_ep *ep, psm2_mq_status2_t *psm2_status, int multi_recv) { psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; struct psmx2_sendv_reply *rep; struct psmx2_multi_recv *recv_req; struct fi_context *fi_context; struct fi_context *recv_context; int i, err; uint8_t *recv_buf; size_t recv_len, len; if (psm2_status->error_code != PSM2_OK) return psmx2_errno(psm2_status->error_code); rep = malloc(sizeof(*rep)); if (!rep) { psm2_status->error_code = PSM2_NO_MEMORY; return -FI_ENOMEM; } recv_context = psm2_status->context; if (multi_recv) { recv_req = PSMX2_CTXT_USER(recv_context); recv_buf = recv_req->buf + recv_req->offset; recv_len = recv_req->len - recv_req->offset; rep->multi_recv = 1; } else { recv_buf = PSMX2_CTXT_USER(recv_context); recv_len = PSMX2_CTXT_SIZE(recv_context); rep->multi_recv = 0; } /* assert(psm2_status->nbytes <= PSMX2_IOV_BUF_SIZE */ memcpy(&rep->iov_info, recv_buf, psm2_status->nbytes); rep->user_context = psm2_status->context; rep->buf = recv_buf; rep->no_completion = 0; rep->iov_done = 0; rep->bytes_received = 0; rep->msg_length = 0; rep->error_code = PSM2_OK; fi_context = &rep->fi_context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_IOV_RECV_CONTEXT; PSMX2_CTXT_USER(fi_context) = rep; PSMX2_CTXT_EP(fi_context) = ep; /* use the same tag, with IOV bit cleared, and seq_num added */ psm2_tag = psm2_status->msg_tag; psm2_tag.tag2 &= ~PSMX2_IOV_BIT; PSMX2_TAG32_SET_SEQ(psm2_tag.tag2, rep->iov_info.seq_num); rep->comp_flag = (psm2_tag.tag2 & PSMX2_MSG_BIT) ? FI_MSG : FI_TAGGED; if (psm2_tag.tag2 & PSMX2_IMM_BIT) rep->comp_flag |= FI_REMOTE_CQ_DATA; /* match every bit of the tag */ PSMX2_SET_TAG(psm2_tagsel, -1UL, -1); for (i=0; i<rep->iov_info.count; i++) { if (recv_len) { len = MIN(recv_len, rep->iov_info.len[i]); err = psm2_mq_irecv2(ep->domain->psm2_mq, psm2_status->msg_peer, &psm2_tag, &psm2_tagsel, 0/*flag*/, recv_buf, len, (void *)fi_context, &psm2_req); if (err) { psm2_status->error_code = err; return psmx2_errno(psm2_status->error_code); } recv_buf += len; recv_len -= len; } else { /* recv buffer full, pust empty recvs */ err = psm2_mq_irecv2(ep->domain->psm2_mq, psm2_status->msg_peer, &psm2_tag, &psm2_tagsel, 0/*flag*/, NULL, 0, (void *)fi_context, &psm2_req); if (err) { psm2_status->error_code = err; return psmx2_errno(psm2_status->error_code); } } } return 0; }
ssize_t psmx2_read_generic(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context, uint64_t flags) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; struct psmx2_epaddr_context *epaddr_context; struct psmx2_am_request *req; psm2_amarg_t args[8]; int chunk_size; size_t offset = 0; psm2_epaddr_t psm2_epaddr; uint8_t vlane; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; uint32_t tag32; size_t idx; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) { struct psmx2_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -FI_ENOMEM; trigger->op = PSMX2_TRIGGERED_READ; trigger->cntr = container_of(ctxt->trigger.threshold.cntr, struct psmx2_fid_cntr, cntr); trigger->threshold = ctxt->trigger.threshold.threshold; trigger->read.ep = ep; trigger->read.buf = buf; trigger->read.len = len; trigger->read.desc = desc; trigger->read.src_addr = src_addr; trigger->read.addr = addr; trigger->read.key = key; trigger->read.context = context; trigger->read.flags = flags & ~FI_TRIGGER; psmx2_cntr_add_trigger(trigger->cntr, trigger); return 0; } if (!buf) return -FI_EINVAL; av = ep_priv->av; if (av && av->type == FI_AV_TABLE) { idx = src_addr; if (idx >= av->last) return -FI_EINVAL; psm2_epaddr = av->epaddrs[idx]; vlane = av->vlanes[idx]; } else { if (!src_addr) return -FI_EINVAL; psm2_epaddr = PSMX2_ADDR_TO_EP(src_addr); vlane = PSMX2_ADDR_TO_VL(src_addr); } epaddr_context = psm2_epaddr_getctxt((void *)psm2_epaddr); if (epaddr_context->epid == ep_priv->domain->psm2_epid) return psmx2_rma_self(PSMX2_AM_REQ_READ, ep_priv, ep_priv->domain->eps[vlane], buf, len, desc, addr, key, context, flags, 0); req = calloc(1, sizeof(*req)); if (!req) return -FI_ENOMEM; req->op = PSMX2_AM_REQ_READ; req->read.buf = buf; req->read.len = len; req->read.addr = addr; /* needed? */ req->read.key = key; /* needed? */ req->read.context = context; req->ep = ep_priv; req->cq_flags = FI_READ | FI_RMA; PSMX2_CTXT_TYPE(&req->fi_context) = PSMX2_READ_CONTEXT; PSMX2_CTXT_USER(&req->fi_context) = context; PSMX2_CTXT_EP(&req->fi_context) = ep_priv; if (ep_priv->send_selective_completion && !(flags & FI_COMPLETION)) { PSMX2_CTXT_TYPE(&req->fi_context) = PSMX2_NOCOMP_READ_CONTEXT; req->no_event = 1; } chunk_size = psmx2_am_param.max_reply_short; args[0].u32w0 = 0; PSMX2_AM_SET_SRC(args[0].u32w0, ep_priv->vlane); PSMX2_AM_SET_DST(args[0].u32w0, vlane); if (psmx2_env.tagged_rma && len > chunk_size) { tag32 = PSMX2_TAG32(PSMX2_RMA_BIT, vlane, ep_priv->vlane); PSMX2_SET_TAG(psm2_tag, (uint64_t)req, tag32); PSMX2_SET_TAG(psm2_tagsel, -1ULL, -1); psm2_mq_irecv2(ep_priv->domain->psm2_mq, psm2_epaddr, &psm2_tag, &psm2_tagsel, 0, buf, len, (void *)&req->fi_context, &psm2_req); PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_READ_LONG); args[0].u32w1 = len; args[1].u64 = (uint64_t)req; args[2].u64 = addr; args[3].u64 = key; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, 4, NULL, 0, 0, NULL, NULL); return 0; } PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_READ); args[1].u64 = (uint64_t)(uintptr_t)req; args[3].u64 = key; while (len > chunk_size) { args[0].u32w1 = chunk_size; args[2].u64 = addr; args[4].u64 = offset; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, 5, NULL, 0, 0, NULL, NULL); addr += chunk_size; len -= chunk_size; offset += chunk_size; } PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_EOM); args[0].u32w1 = len; args[2].u64 = addr; args[4].u64 = offset; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, 5, NULL, 0, 0, NULL, NULL); return 0; }
int psmx2_ep_open_internal(struct psmx2_fid_domain *domain_priv, struct fi_info *info, struct psmx2_fid_ep **ep_out, void *context, struct psmx2_trx_ctxt *trx_ctxt) { struct psmx2_fid_ep *ep_priv; uint64_t ep_cap; int err = -FI_EINVAL; if (info) ep_cap = info->caps; else ep_cap = FI_TAGGED; if (info && info->ep_attr && info->ep_attr->auth_key) { if (info->ep_attr->auth_key_size != sizeof(psm2_uuid_t)) { FI_WARN(&psmx2_prov, FI_LOG_EP_CTRL, "Invalid auth_key_len %"PRIu64 ", should be %"PRIu64".\n", info->ep_attr->auth_key_size, sizeof(psm2_uuid_t)); goto errout; } if (memcmp(domain_priv->fabric->uuid, info->ep_attr->auth_key, sizeof(psm2_uuid_t))) { FI_WARN(&psmx2_prov, FI_LOG_EP_CTRL, "Invalid auth_key: %s\n", psmx2_uuid_to_string((void *)info->ep_attr->auth_key)); goto errout; } } ep_priv = (struct psmx2_fid_ep *) calloc(1, sizeof *ep_priv); if (!ep_priv) { err = -FI_ENOMEM; goto errout; } ep_priv->ep.fid.fclass = FI_CLASS_EP; ep_priv->ep.fid.context = context; ep_priv->ep.fid.ops = &psmx2_fi_ops; ep_priv->ep.ops = &psmx2_ep_ops; ep_priv->ep.cm = &psmx2_cm_ops; ep_priv->domain = domain_priv; ep_priv->rx = trx_ctxt; if (!(info && info->ep_attr && info->ep_attr->tx_ctx_cnt == FI_SHARED_CONTEXT)) ep_priv->tx = trx_ctxt; ofi_atomic_initialize32(&ep_priv->ref, 0); PSMX2_CTXT_TYPE(&ep_priv->nocomp_send_context) = PSMX2_NOCOMP_SEND_CONTEXT; PSMX2_CTXT_EP(&ep_priv->nocomp_send_context) = ep_priv; PSMX2_CTXT_TYPE(&ep_priv->nocomp_tsend_context) = PSMX2_NOCOMP_TSEND_CONTEXT; PSMX2_CTXT_EP(&ep_priv->nocomp_tsend_context) = ep_priv; if (ep_cap & FI_TAGGED) ep_priv->ep.tagged = &psmx2_tagged_ops; if (ep_cap & FI_MSG) ep_priv->ep.msg = &psmx2_msg_ops; if (ep_cap & FI_RMA) ep_priv->ep.rma = &psmx2_rma_ops; if (ep_cap & FI_ATOMICS) ep_priv->ep.atomic = &psmx2_atomic_ops; ep_priv->caps = ep_cap; err = psmx2_domain_enable_ep(domain_priv, ep_priv); if (err) goto errout_free_ep; psmx2_domain_acquire(domain_priv); if (info) { if (info->tx_attr) ep_priv->tx_flags = info->tx_attr->op_flags; if (info->rx_attr) ep_priv->rx_flags = info->rx_attr->op_flags; } psmx2_ep_optimize_ops(ep_priv); PSMX2_EP_INIT_OP_CONTEXT(ep_priv); *ep_out = ep_priv; return 0; errout_free_ep: free(ep_priv); errout: return err; }
ssize_t psmx2_send_generic(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, void *context, uint64_t flags, uint64_t data) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag; struct fi_context * fi_context; int send_flag = 0; int err; int no_completion = 0; struct psmx2_cq_event *event; int have_data = (flags & FI_REMOTE_CQ_DATA) > 0; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) return psmx2_trigger_queue_send(ep, buf, len, desc, dest_addr, context, flags, data); av = ep_priv->av; assert(av); psm2_epaddr = psmx2_av_translate_addr(av, ep_priv->tx, dest_addr, av->type); PSMX2_SET_TAG(psm2_tag, 0, data, PSMX2_TYPE_MSG | PSMX2_IMM_BIT_SET(have_data)); if ((flags & PSMX2_NO_COMPLETION) || (ep_priv->send_selective_completion && !(flags & FI_COMPLETION))) no_completion = 1; if (flags & FI_INJECT) { if (len > psmx2_env.inject_size) return -FI_EMSGSIZE; err = psm2_mq_send2(ep_priv->tx->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, buf, len); if (err != PSM2_OK) return psmx2_errno(err); if (ep_priv->send_cntr) psmx2_cntr_inc(ep_priv->send_cntr, 0); if (ep_priv->send_cq && !no_completion) { event = psmx2_cq_create_event( ep_priv->send_cq, context, (void *)buf, flags, len, (uint64_t) data, 0 /* tag */, 0 /* olen */, 0 /* err */); if (event) psmx2_cq_enqueue_event(ep_priv->send_cq, event); else return -FI_ENOMEM; } return 0; } if (no_completion) { fi_context = &ep_priv->nocomp_send_context; } else { assert(context); fi_context = context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_SEND_CONTEXT; PSMX2_CTXT_USER(fi_context) = (void *)buf; PSMX2_CTXT_EP(fi_context) = ep_priv; } err = psm2_mq_isend2(ep_priv->tx->psm2_mq, psm2_epaddr, send_flag, &psm2_tag, buf, len, (void *)fi_context, &psm2_req); if (err != PSM2_OK) return psmx2_errno(err); if (fi_context == context) PSMX2_CTXT_REQ(fi_context) = psm2_req; return 0; }