int psmx2_am_process_rma(struct psmx2_trx_ctxt *trx_ctxt, struct psmx2_am_request *req) { int err; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; if ((req->op & PSMX2_AM_OP_MASK) == PSMX2_AM_REQ_WRITE_LONG) { PSMX2_SET_TAG(psm2_tag, (uint64_t)req->write.context, 0, PSMX2_RMA_TYPE_WRITE); PSMX2_SET_MASK(psm2_tagsel, PSMX2_MATCH_ALL, PSMX2_RMA_TYPE_MASK); err = psm2_mq_irecv2(trx_ctxt->psm2_mq, (psm2_epaddr_t)req->write.peer_addr, &psm2_tag, &psm2_tagsel, 0, (void *)req->write.addr, req->write.len, (void *)&req->fi_context, &psm2_req); } else { PSMX2_SET_TAG(psm2_tag, (uint64_t)req->read.context, 0, PSMX2_RMA_TYPE_READ); err = psm2_mq_isend2(trx_ctxt->psm2_mq, (psm2_epaddr_t)req->read.peer_addr, 0, &psm2_tag, (void *)req->read.addr, req->read.len, (void *)&req->fi_context, &psm2_req); } return psmx2_errno(err); }
ssize_t psmx2_readv_generic(struct fid_ep *ep, const struct iovec *iov, void *desc, size_t count, fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context, uint64_t flags) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; struct psmx2_epaddr_context *epaddr_context; struct psmx2_am_request *req; psm2_amarg_t args[8]; int chunk_size; size_t offset = 0; psm2_epaddr_t psm2_epaddr; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; size_t idx; size_t total_len, long_len = 0, short_len; void *long_buf = NULL; int i; int err; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) return psmx2_trigger_queue_readv(ep, iov, desc, count, src_addr, addr, key, context, flags); av = ep_priv->av; if (av && PSMX2_SEP_ADDR_TEST(src_addr)) { psm2_epaddr = psmx2_av_translate_sep(av, ep_priv->tx, src_addr); } else if (av && av->type == FI_AV_TABLE) { idx = src_addr; if ((err = psmx2_av_check_table_idx(av, ep_priv->tx, idx))) return err; psm2_epaddr = av->tables[ep_priv->tx->id].epaddrs[idx]; } else { if (!src_addr) return -FI_EINVAL; psm2_epaddr = PSMX2_ADDR_TO_EP(src_addr); } epaddr_context = psm2_epaddr_getctxt((void *)psm2_epaddr); if (epaddr_context->epid == ep_priv->tx->psm2_epid) return psmx2_rma_self(PSMX2_AM_REQ_READV, ep_priv, (void *)iov, count, desc, addr, key, context, flags, 0); total_len = 0; for (i=0; i<count; i++) total_len += iov[i].iov_len; req = psmx2_am_request_alloc(ep_priv->tx); if (!req) return -FI_ENOMEM; req->tmpbuf = malloc(count * sizeof(struct iovec)); if (!req->tmpbuf) { psmx2_am_request_free(ep_priv->tx, req); return -FI_ENOMEM; } req->iov = req->tmpbuf; memcpy(req->iov, iov, count * sizeof(struct iovec)); req->op = PSMX2_AM_REQ_READV; req->read.iov_count = count; req->read.len = total_len; req->read.addr = addr; /* needed? */ req->read.key = key; /* needed? */ req->read.context = context; req->ep = ep_priv; req->cq_flags = FI_READ | FI_RMA; PSMX2_CTXT_TYPE(&req->fi_context) = PSMX2_READ_CONTEXT; PSMX2_CTXT_USER(&req->fi_context) = context; PSMX2_CTXT_EP(&req->fi_context) = ep_priv; if (ep_priv->send_selective_completion && !(flags & FI_COMPLETION)) { PSMX2_CTXT_TYPE(&req->fi_context) = PSMX2_NOCOMP_READ_CONTEXT; req->no_event = 1; } chunk_size = ep_priv->tx->psm2_am_param.max_reply_short; if (psmx2_env.tagged_rma) { for (i=count-1; i>=0; i--) { if (iov[i].iov_len > chunk_size) { long_buf = iov[i].iov_base; long_len = iov[i].iov_len; break; } else if (iov[i].iov_len) { break; } } } short_len = total_len - long_len; /* Use short protocol for all but the last segment (long_len) */ args[0].u32w0 = 0; PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_READ); args[1].u64 = (uint64_t)(uintptr_t)req; args[3].u64 = key; while (short_len > chunk_size) { args[0].u32w1 = chunk_size; args[2].u64 = addr; args[4].u64 = offset; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, 5, NULL, 0, 0, NULL, NULL); addr += chunk_size; short_len -= chunk_size; offset += chunk_size; } if (!long_len) PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_EOM); args[0].u32w1 = short_len; args[2].u64 = addr; args[4].u64 = offset; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, 5, NULL, 0, 0, NULL, NULL); /* Use the long protocol for the last segment */ if (long_len) { PSMX2_SET_TAG(psm2_tag, (uint64_t)req, 0, PSMX2_RMA_TYPE_READ); PSMX2_SET_MASK(psm2_tagsel, PSMX2_MATCH_ALL, PSMX2_RMA_TYPE_MASK); psm2_mq_irecv2(ep_priv->tx->psm2_mq, psm2_epaddr, &psm2_tag, &psm2_tagsel, 0, long_buf, long_len, (void *)&req->fi_context, &psm2_req); PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_READ_LONG); args[0].u32w1 = long_len; args[1].u64 = (uint64_t)req; args[2].u64 = addr + short_len; args[3].u64 = key; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, 4, NULL, 0, 0, NULL, NULL); } return 0; }
ssize_t psmx2_read_generic(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context, uint64_t flags) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; struct psmx2_epaddr_context *epaddr_context; struct psmx2_am_request *req; psm2_amarg_t args[8]; int chunk_size; size_t offset = 0; psm2_epaddr_t psm2_epaddr; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; size_t idx; int err; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) return psmx2_trigger_queue_read(ep, buf, len, desc, src_addr, addr, key, context, flags); if (!buf) return -FI_EINVAL; av = ep_priv->av; if (av && PSMX2_SEP_ADDR_TEST(src_addr)) { psm2_epaddr = psmx2_av_translate_sep(av, ep_priv->tx, src_addr); } else if (av && av->type == FI_AV_TABLE) { idx = src_addr; if ((err = psmx2_av_check_table_idx(av, ep_priv->tx, idx))) return err; psm2_epaddr = av->tables[ep_priv->tx->id].epaddrs[idx]; } else { if (!src_addr) return -FI_EINVAL; psm2_epaddr = PSMX2_ADDR_TO_EP(src_addr); } epaddr_context = psm2_epaddr_getctxt((void *)psm2_epaddr); if (epaddr_context->epid == ep_priv->tx->psm2_epid) return psmx2_rma_self(PSMX2_AM_REQ_READ, ep_priv, buf, len, desc, addr, key, context, flags, 0); req = psmx2_am_request_alloc(ep_priv->tx); if (!req) return -FI_ENOMEM; req->op = PSMX2_AM_REQ_READ; req->read.buf = buf; req->read.len = len; req->read.addr = addr; /* needed? */ req->read.key = key; /* needed? */ req->read.context = context; req->ep = ep_priv; req->cq_flags = FI_READ | FI_RMA; PSMX2_CTXT_TYPE(&req->fi_context) = PSMX2_READ_CONTEXT; PSMX2_CTXT_USER(&req->fi_context) = context; PSMX2_CTXT_EP(&req->fi_context) = ep_priv; if (ep_priv->send_selective_completion && !(flags & FI_COMPLETION)) { PSMX2_CTXT_TYPE(&req->fi_context) = PSMX2_NOCOMP_READ_CONTEXT; req->no_event = 1; } chunk_size = ep_priv->tx->psm2_am_param.max_reply_short; args[0].u32w0 = 0; if (psmx2_env.tagged_rma && len > chunk_size) { PSMX2_SET_TAG(psm2_tag, (uint64_t)req, 0, PSMX2_RMA_TYPE_READ); PSMX2_SET_MASK(psm2_tagsel, PSMX2_MATCH_ALL, PSMX2_RMA_TYPE_MASK); psm2_mq_irecv2(ep_priv->tx->psm2_mq, psm2_epaddr, &psm2_tag, &psm2_tagsel, 0, buf, len, (void *)&req->fi_context, &psm2_req); PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_READ_LONG); args[0].u32w1 = len; args[1].u64 = (uint64_t)req; args[2].u64 = addr; args[3].u64 = key; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, 4, NULL, 0, 0, NULL, NULL); return 0; } PSMX2_AM_SET_OP(args[0].u32w0, PSMX2_AM_REQ_READ); args[1].u64 = (uint64_t)(uintptr_t)req; args[3].u64 = key; while (len > chunk_size) { args[0].u32w1 = chunk_size; args[2].u64 = addr; args[4].u64 = offset; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, 5, NULL, 0, 0, NULL, NULL); addr += chunk_size; len -= chunk_size; offset += chunk_size; } PSMX2_AM_SET_FLAG(args[0].u32w0, PSMX2_AM_EOM); args[0].u32w1 = len; args[2].u64 = addr; args[4].u64 = offset; psm2_am_request_short(psm2_epaddr, PSMX2_AM_RMA_HANDLER, args, 5, NULL, 0, 0, NULL, NULL); return 0; }
int psmx2_handle_sendv_req(struct psmx2_fid_ep *ep, PSMX2_STATUS_TYPE *status, int multi_recv) { psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; struct psmx2_sendv_reply *rep; struct psmx2_multi_recv *recv_req; struct fi_context *fi_context; struct fi_context *recv_context; int i, err; uint8_t *recv_buf; size_t recv_len, len; if (PSMX2_STATUS_ERROR(status) != PSM2_OK) return psmx2_errno(PSMX2_STATUS_ERROR(status)); rep = malloc(sizeof(*rep)); if (!rep) { PSMX2_STATUS_ERROR(status) = PSM2_NO_MEMORY; return -FI_ENOMEM; } recv_context = PSMX2_STATUS_CONTEXT(status); if (multi_recv) { recv_req = PSMX2_CTXT_USER(recv_context); recv_buf = recv_req->buf + recv_req->offset; recv_len = recv_req->len - recv_req->offset; rep->multi_recv = 1; } else { recv_buf = PSMX2_CTXT_USER(recv_context); recv_len = PSMX2_CTXT_SIZE(recv_context); rep->multi_recv = 0; } /* assert(PSMX2_STATUS_RCVLEN(status) <= PSMX2_IOV_BUF_SIZE); */ memcpy(&rep->iov_info, recv_buf, PSMX2_STATUS_RCVLEN(status)); rep->user_context = PSMX2_STATUS_CONTEXT(status); rep->tag = PSMX2_STATUS_TAG(status); rep->buf = recv_buf; rep->no_completion = 0; rep->iov_done = 0; rep->bytes_received = 0; rep->msg_length = 0; rep->error_code = PSM2_OK; fi_context = &rep->fi_context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_IOV_RECV_CONTEXT; PSMX2_CTXT_USER(fi_context) = rep; PSMX2_CTXT_EP(fi_context) = ep; rep->comp_flag = PSMX2_IS_MSG(PSMX2_GET_FLAGS(rep->tag)) ? FI_MSG : FI_TAGGED; if (PSMX2_HAS_IMM(PSMX2_GET_FLAGS(rep->tag))) rep->comp_flag |= FI_REMOTE_CQ_DATA; /* IOV payload uses a sequence number in place of a tag. */ PSMX2_SET_TAG(psm2_tag, rep->iov_info.seq_num, 0, PSMX2_TYPE_IOV_PAYLOAD); PSMX2_SET_MASK(psm2_tagsel, PSMX2_MATCH_ALL, PSMX2_TYPE_MASK); for (i=0; i<rep->iov_info.count; i++) { if (recv_len) { len = MIN(recv_len, rep->iov_info.len[i]); err = psm2_mq_irecv2(ep->rx->psm2_mq, PSMX2_STATUS_PEER(status), &psm2_tag, &psm2_tagsel, 0/*flag*/, recv_buf, len, (void *)fi_context, &psm2_req); if (err) { PSMX2_STATUS_ERROR(status) = err; return psmx2_errno(err); } recv_buf += len; recv_len -= len; } else { /* recv buffer full, post empty recvs */ err = psm2_mq_irecv2(ep->rx->psm2_mq, PSMX2_STATUS_PEER(status), &psm2_tag, &psm2_tagsel, 0/*flag*/, NULL, 0, (void *)fi_context, &psm2_req); if (err) { PSMX2_STATUS_ERROR(status) = err; return psmx2_errno(err); } } } if (multi_recv && recv_len < recv_req->min_buf_size) rep->comp_flag |= FI_MULTI_RECV; return 0; }
ssize_t psmx2_recv_generic(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr, void *context, uint64_t flags) { struct psmx2_fid_ep *ep_priv; struct psmx2_fid_av *av; psm2_epaddr_t psm2_epaddr; psm2_mq_req_t psm2_req; psm2_mq_tag_t psm2_tag, psm2_tagsel; struct fi_context *fi_context; int recv_flag = 0; int err; int enable_completion; ep_priv = container_of(ep, struct psmx2_fid_ep, ep); if (flags & FI_TRIGGER) return psmx2_trigger_queue_recv(ep, buf, len, desc, src_addr, context, flags); if ((ep_priv->caps & FI_DIRECTED_RECV) && src_addr != FI_ADDR_UNSPEC) { av = ep_priv->av; assert(av); psm2_epaddr = psmx2_av_translate_addr(av, ep_priv->rx, src_addr, av->type); } else { psm2_epaddr = 0; } PSMX2_SET_TAG(psm2_tag, 0ULL, 0, PSMX2_TYPE_MSG); PSMX2_SET_MASK(psm2_tagsel, PSMX2_MATCH_NONE, PSMX2_TYPE_MASK); enable_completion = !ep_priv->recv_selective_completion || (flags & FI_COMPLETION); if (enable_completion) { assert(context); fi_context = context; if (flags & FI_MULTI_RECV) { struct psmx2_multi_recv *req; req = calloc(1, sizeof(*req)); if (!req) return -FI_ENOMEM; req->src_addr = psm2_epaddr; req->tag = psm2_tag; req->tagsel = psm2_tagsel; req->flag = recv_flag; req->buf = buf; req->len = len; req->offset = 0; req->min_buf_size = ep_priv->min_multi_recv; req->context = fi_context; PSMX2_CTXT_TYPE(fi_context) = PSMX2_MULTI_RECV_CONTEXT; PSMX2_CTXT_USER(fi_context) = req; if (len > PSMX2_MAX_MSG_SIZE) len = PSMX2_MAX_MSG_SIZE; } else { PSMX2_CTXT_TYPE(fi_context) = PSMX2_RECV_CONTEXT; PSMX2_CTXT_USER(fi_context) = buf; } PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_CTXT_SIZE(fi_context) = len; } else { PSMX2_EP_GET_OP_CONTEXT(ep_priv, fi_context); #if !PSMX2_USE_REQ_CONTEXT PSMX2_CTXT_TYPE(fi_context) = PSMX2_NOCOMP_RECV_CONTEXT; PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_CTXT_USER(fi_context) = buf; PSMX2_CTXT_SIZE(fi_context) = len; #endif } err = psm2_mq_irecv2(ep_priv->rx->psm2_mq, psm2_epaddr, &psm2_tag, &psm2_tagsel, recv_flag, buf, len, (void *)fi_context, &psm2_req); if (OFI_UNLIKELY(err != PSM2_OK)) return psmx2_errno(err); if (enable_completion) { PSMX2_CTXT_REQ(fi_context) = psm2_req; } else { #if PSMX2_USE_REQ_CONTEXT PSMX2_REQ_GET_OP_CONTEXT(psm2_req, fi_context); PSMX2_CTXT_TYPE(fi_context) = PSMX2_NOCOMP_RECV_CONTEXT; PSMX2_CTXT_EP(fi_context) = ep_priv; PSMX2_CTXT_USER(fi_context) = buf; PSMX2_CTXT_SIZE(fi_context) = len; #endif } return 0; }