ssize_t psmx_tagged_send_no_event_av_table(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, uint64_t tag, void *context) { struct psmx_fid_ep *ep_priv; struct psmx_fid_av *av; psm_epaddr_t psm_epaddr; psm_mq_req_t psm_req; uint64_t psm_tag; #if (PSM_VERNO_MAJOR >= 2) psm_mq_tag_t psm_tag2; #endif struct fi_context *fi_context; int err; size_t idx; ep_priv = container_of(ep, struct psmx_fid_ep, ep); av = ep_priv->av; idx = (size_t)dest_addr; if (idx >= av->last) return -FI_EINVAL; psm_epaddr = av->psm_epaddrs[idx]; psm_tag = tag & (~ep_priv->domain->reserved_tag_bits); #if (PSM_VERNO_MAJOR >= 2) PSMX_SET_TAG(psm_tag2, psm_tag, 0); #endif fi_context = &ep_priv->nocomp_send_context; #if (PSM_VERNO_MAJOR >= 2) err = psm_mq_isend2(ep_priv->domain->psm_mq, psm_epaddr, 0, &psm_tag2, buf, len, (void*)fi_context, &psm_req); #else err = psm_mq_isend(ep_priv->domain->psm_mq, psm_epaddr, 0, psm_tag, buf, len, (void*)fi_context, &psm_req); #endif if (err != PSM_OK) return psmx_errno(err); return 0; }
static inline int _pspsm_send_buf(pspsm_con_info_t *con_info, char *buf, size_t len, uint64_t tag, psm_mq_req_t *req, unsigned long nr) { void *context = (void *)((uintptr_t)con_info | nr); psm_error_t ret; assert(*req == PSM_MQ_REQINVALID); ret = psm_mq_isend(pspsm_mq, con_info->epaddr, /* flags */ 0, tag, buf, len, context, req); if (ret != PSM_OK) goto err; return 0; err: pspsm_err(psm_error_get_string(ret)); pspsm_dprint(1, "_pspsm_send_buf: %s", pspsm_err_str); return -EPIPE; }
int psmx_am_process_rma(struct psmx_fid_domain *domain, struct psmx_am_request *req) { int err; psm_mq_req_t psm_req; if ((req->op & PSMX_AM_OP_MASK) == PSMX_AM_REQ_WRITE_LONG) { err = psm_mq_irecv(domain->psm_mq, (uint64_t)req->write.context, -1ULL, 0, (void *)req->write.addr, req->write.len, (void *)&req->fi_context, &psm_req); } else { err = psm_mq_isend(domain->psm_mq, (psm_epaddr_t)req->read.peer_addr, 0, (uint64_t)req->read.context, (void *)req->read.addr, req->read.len, (void *)&req->fi_context, &psm_req); } return psmx_errno(err); }
ssize_t psmx_tagged_send_no_flag_av_map(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, uint64_t tag, void *context) { struct psmx_fid_ep *ep_priv; psm_epaddr_t psm_epaddr; psm_mq_req_t psm_req; uint64_t psm_tag; #if (PSM_VERNO_MAJOR >= 2) psm_mq_tag_t psm_tag2; #endif struct fi_context *fi_context; int err; ep_priv = container_of(ep, struct psmx_fid_ep, ep); psm_epaddr = (psm_epaddr_t) dest_addr; psm_tag = tag & (~ep_priv->domain->reserved_tag_bits); #if (PSM_VERNO_MAJOR >= 2) PSMX_SET_TAG(psm_tag2, psm_tag, 0); #endif fi_context = context; PSMX_CTXT_TYPE(fi_context) = PSMX_TSEND_CONTEXT; PSMX_CTXT_USER(fi_context) = (void *)buf; PSMX_CTXT_EP(fi_context) = ep_priv; #if (PSM_VERNO_MAJOR >= 2) err = psm_mq_isend2(ep_priv->domain->psm_mq, psm_epaddr, 0, &psm_tag2, buf, len, (void*)fi_context, &psm_req); #else err = psm_mq_isend(ep_priv->domain->psm_mq, psm_epaddr, 0, psm_tag, buf, len, (void*)fi_context, &psm_req); #endif if (err != PSM_OK) return psmx_errno(err); PSMX_CTXT_REQ(fi_context) = psm_req; return 0; }
//psm_mq_req_t non_blocking_send(const psm_mq_t mq, psm_epaddr_t dest_ep, const void *buf, uint32_t len, int context_id, int send_tag, const my_request_t *req) psm_mq_req_t non_blocking_send(const psm_mq_t mq, psm_epaddr_t dest_ep, const void *buf, uint32_t len, int context_id, int send_tag, psm_mq_req_t *req) { psm_mq_req_t req_mq; // Set up our send tag, assume that "my_rank" is global and represents // the rank of this process in the job uint64_t tag = 1; /* ( ((context_id & 0xffff) << 48) | */ /* ((my_rank & 0xffff) << 32) | */ /* ((send_tag & 0xffffffff)) ); */ psm_mq_isend(mq, dest_ep, 0, // no flags tag, buf, len, req, // this req is available in psm_mq_status_t when one // of the synchronization functions is called. &req_mq); return req_mq; }
ssize_t psmx_tagged_send_no_flag_av_table(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, uint64_t tag, void *context) { struct psmx_fid_ep *ep_priv; struct psmx_fid_av *av; psm_epaddr_t psm_epaddr; psm_mq_req_t psm_req; uint64_t psm_tag; struct fi_context *fi_context; int err; size_t idx; ep_priv = container_of(ep, struct psmx_fid_ep, ep); av = ep_priv->av; idx = (size_t)dest_addr; if (idx >= av->last) return -FI_EINVAL; psm_epaddr = av->psm_epaddrs[idx]; psm_tag = tag & (~ep_priv->domain->reserved_tag_bits); fi_context = context; PSMX_CTXT_TYPE(fi_context) = PSMX_TSEND_CONTEXT; PSMX_CTXT_USER(fi_context) = (void *)buf; PSMX_CTXT_EP(fi_context) = ep_priv; err = psm_mq_isend(ep_priv->domain->psm_mq, psm_epaddr, 0, psm_tag, buf, len, (void*)fi_context, &psm_req); if (err != PSM_OK) return psmx_errno(err); PSMX_CTXT_REQ(fi_context) = psm_req; return 0; }
ssize_t _psmx_writeto(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context, uint64_t flags, uint64_t data) { struct psmx_fid_ep *ep_priv; struct psmx_fid_av *av; struct psmx_epaddr_context *epaddr_context; struct psmx_am_request *req; psm_amarg_t args[8]; int nargs; int am_flags = PSM_AM_FLAG_ASYNC; int err; int chunk_size; psm_mq_req_t psm_req; uint64_t psm_tag; size_t idx; if (flags & FI_TRIGGER) { struct psmx_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -ENOMEM; trigger->op = PSMX_TRIGGERED_WRITE; trigger->cntr = container_of(ctxt->threshold.cntr, struct psmx_fid_cntr, cntr); trigger->threshold = ctxt->threshold.threshold; trigger->write.ep = ep; trigger->write.buf = buf; trigger->write.len = len; trigger->write.desc = desc; trigger->write.dest_addr = dest_addr; trigger->write.addr = addr; trigger->write.key = key; trigger->write.context = context; trigger->write.flags = flags & ~FI_TRIGGER; trigger->write.data = data; psmx_cntr_add_trigger(trigger->cntr, trigger); return 0; } ep_priv = container_of(ep, struct psmx_fid_ep, ep); assert(ep_priv->domain); if (!buf) return -EINVAL; av = ep_priv->av; if (av && av->type == FI_AV_TABLE) { idx = dest_addr; if (idx >= av->last) return -EINVAL; dest_addr = (fi_addr_t) av->psm_epaddrs[idx]; } else if (!dest_addr) { return -EINVAL; } epaddr_context = psm_epaddr_getctxt((void *)dest_addr); if (epaddr_context->epid == ep_priv->domain->psm_epid) return psmx_rma_self(PSMX_AM_REQ_WRITE, ep_priv, (void *)buf, len, desc, addr, key, context, flags, data); if (flags & FI_INJECT) { req = malloc(sizeof(*req) + len); if (!req) return -ENOMEM; memset((void *)req, 0, sizeof(*req)); memcpy((void *)req + sizeof(*req), (void *)buf, len); buf = (void *)req + sizeof(*req); PSMX_CTXT_TYPE(&req->fi_context) = PSMX_INJECT_WRITE_CONTEXT; req->no_event = 1; } else { req = calloc(1, sizeof(*req)); if (!req) return -ENOMEM; if (ep_priv->send_cq_event_flag && !(flags & FI_EVENT)) { PSMX_CTXT_TYPE(&req->fi_context) = PSMX_NOCOMP_WRITE_CONTEXT; req->no_event = 1; } else { PSMX_CTXT_TYPE(&req->fi_context) = PSMX_WRITE_CONTEXT; } } req->op = PSMX_AM_REQ_WRITE; req->write.buf = (void *)buf; req->write.len = len; req->write.addr = addr; /* needed? */ req->write.key = key; /* needed? */ req->write.context = context; req->ep = ep_priv; PSMX_CTXT_USER(&req->fi_context) = context; chunk_size = MIN(PSMX_AM_CHUNK_SIZE, psmx_am_param.max_request_short); if (psmx_env.tagged_rma && len > chunk_size) { psm_tag = PSMX_RMA_BIT | ep_priv->domain->psm_epid; args[0].u32w0 = PSMX_AM_REQ_WRITE_LONG; args[0].u32w1 = len; args[1].u64 = (uint64_t)req; args[2].u64 = addr; args[3].u64 = key; args[4].u64 = psm_tag; nargs = 5; if (flags & FI_REMOTE_CQ_DATA) { args[5].u64 = data; args[0].u32w0 |= PSMX_AM_DATA; nargs++; } err = psm_am_request_short((psm_epaddr_t) dest_addr, PSMX_AM_RMA_HANDLER, args, nargs, NULL, 0, am_flags | PSM_AM_FLAG_NOREPLY, NULL, NULL); psm_mq_isend(ep_priv->domain->psm_mq, (psm_epaddr_t) dest_addr, 0, psm_tag, buf, len, (void *)&req->fi_context, &psm_req); return 0; } nargs = 4; while (len > chunk_size) { args[0].u32w0 = PSMX_AM_REQ_WRITE; args[0].u32w1 = chunk_size; args[1].u64 = (uint64_t)(uintptr_t)req; args[2].u64 = addr; args[3].u64 = key; err = psm_am_request_short((psm_epaddr_t) dest_addr, PSMX_AM_RMA_HANDLER, args, nargs, (void *)buf, chunk_size, am_flags | PSM_AM_FLAG_NOREPLY, NULL, NULL); buf += chunk_size; addr += chunk_size; len -= chunk_size; } args[0].u32w0 = PSMX_AM_REQ_WRITE | PSMX_AM_EOM; args[0].u32w1 = len; args[1].u64 = (uint64_t)(uintptr_t)req; args[2].u64 = addr; args[3].u64 = key; if (flags & FI_REMOTE_CQ_DATA) { args[4].u64 = data; args[0].u32w0 |= PSMX_AM_DATA; nargs++; } err = psm_am_request_short((psm_epaddr_t) dest_addr, PSMX_AM_RMA_HANDLER, args, nargs, (void *)buf, len, am_flags, NULL, NULL); return 0; }
ssize_t _psmx_tagged_send(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, uint64_t tag, void *context, uint64_t flags) #endif { struct psmx_fid_ep *ep_priv; struct psmx_fid_av *av; psm_epaddr_t psm_epaddr; psm_mq_req_t psm_req; uint64_t psm_tag; #if (PSM_VERNO_MAJOR >= 2) psm_mq_tag_t psm_tag2; #endif struct fi_context *fi_context; int err; size_t idx; int no_completion = 0; struct psmx_cq_event *event; ep_priv = container_of(ep, struct psmx_fid_ep, ep); if (flags & FI_TRIGGER) { struct psmx_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -FI_ENOMEM; trigger->op = PSMX_TRIGGERED_TSEND; trigger->cntr = container_of(ctxt->trigger.threshold.cntr, struct psmx_fid_cntr, cntr); trigger->threshold = ctxt->trigger.threshold.threshold; trigger->tsend.ep = ep; trigger->tsend.buf = buf; trigger->tsend.len = len; trigger->tsend.desc = desc; trigger->tsend.dest_addr = dest_addr; trigger->tsend.tag = tag; trigger->tsend.context = context; trigger->tsend.flags = flags & ~FI_TRIGGER; #if (PSM_VERNO_MAJOR >= 2) trigger->tsend.data = data; #endif psmx_cntr_add_trigger(trigger->cntr, trigger); return 0; } if (tag & ep_priv->domain->reserved_tag_bits) { FI_WARN(&psmx_prov, FI_LOG_EP_DATA, "using reserved tag bits." "tag=%lx. reserved_bits=%lx.\n", tag, ep_priv->domain->reserved_tag_bits); } av = ep_priv->av; if (av && av->type == FI_AV_TABLE) { idx = (size_t)dest_addr; if (idx >= av->last) return -FI_EINVAL; psm_epaddr = av->psm_epaddrs[idx]; } else { psm_epaddr = (psm_epaddr_t) dest_addr; } psm_tag = tag & (~ep_priv->domain->reserved_tag_bits); #if (PSM_VERNO_MAJOR >= 2) PSMX_SET_TAG(psm_tag2, psm_tag, data); #endif if ((flags & PSMX_NO_COMPLETION) || (ep_priv->send_selective_completion && !(flags & FI_COMPLETION))) no_completion = 1; if (flags & FI_INJECT) { if (len > PSMX_INJECT_SIZE) return -FI_EMSGSIZE; #if (PSM_VERNO_MAJOR >= 2) err = psm_mq_send2(ep_priv->domain->psm_mq, psm_epaddr, 0, &psm_tag2, buf, len); #else err = psm_mq_send(ep_priv->domain->psm_mq, psm_epaddr, 0, psm_tag, buf, len); #endif if (err != PSM_OK) return psmx_errno(err); if (ep_priv->send_cntr) psmx_cntr_inc(ep_priv->send_cntr); if (ep_priv->send_cq && !no_completion) { event = psmx_cq_create_event( ep_priv->send_cq, context, (void *)buf, flags, len, #if (PSM_VERNO_MAJOR >= 2) (uint64_t) data, psm_tag, #else 0 /* data */, psm_tag, #endif 0 /* olen */, 0 /* err */); if (event) psmx_cq_enqueue_event(ep_priv->send_cq, event); else return -FI_ENOMEM; } return 0; } if (no_completion && !context) { fi_context = &ep_priv->nocomp_send_context; } else { if (!context) return -FI_EINVAL; fi_context = context; PSMX_CTXT_TYPE(fi_context) = PSMX_TSEND_CONTEXT; PSMX_CTXT_USER(fi_context) = (void *)buf; PSMX_CTXT_EP(fi_context) = ep_priv; } #if (PSM_VERNO_MAJOR >= 2) err = psm_mq_isend2(ep_priv->domain->psm_mq, psm_epaddr, 0, &psm_tag2, buf, len, (void*)fi_context, &psm_req); #else err = psm_mq_isend(ep_priv->domain->psm_mq, psm_epaddr, 0, psm_tag, buf, len, (void*)fi_context, &psm_req); #endif if (err != PSM_OK) return psmx_errno(err); if (fi_context == context) PSMX_CTXT_REQ(fi_context) = psm_req; return 0; }
ssize_t _psmx_write(struct fid_ep *ep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context, uint64_t flags, uint64_t data) { struct psmx_fid_ep *ep_priv; struct psmx_fid_av *av; struct psmx_epaddr_context *epaddr_context; struct psmx_am_request *req; psm_amarg_t args[8]; int nargs; int am_flags = PSM_AM_FLAG_ASYNC; int chunk_size; psm_mq_req_t psm_req; uint64_t psm_tag; size_t idx; void *psm_context; int no_event; ep_priv = container_of(ep, struct psmx_fid_ep, ep); if (flags & FI_TRIGGER) { struct psmx_trigger *trigger; struct fi_triggered_context *ctxt = context; trigger = calloc(1, sizeof(*trigger)); if (!trigger) return -FI_ENOMEM; trigger->op = PSMX_TRIGGERED_WRITE; trigger->cntr = container_of(ctxt->trigger.threshold.cntr, struct psmx_fid_cntr, cntr); trigger->threshold = ctxt->trigger.threshold.threshold; trigger->write.ep = ep; trigger->write.buf = buf; trigger->write.len = len; trigger->write.desc = desc; trigger->write.dest_addr = dest_addr; trigger->write.addr = addr; trigger->write.key = key; trigger->write.context = context; trigger->write.flags = flags & ~FI_TRIGGER; trigger->write.data = data; psmx_cntr_add_trigger(trigger->cntr, trigger); return 0; } if (!buf) return -FI_EINVAL; av = ep_priv->av; if (av && av->type == FI_AV_TABLE) { idx = dest_addr; if (idx >= av->last) return -FI_EINVAL; dest_addr = (fi_addr_t) av->psm_epaddrs[idx]; } else if (!dest_addr) { return -FI_EINVAL; } epaddr_context = psm_epaddr_getctxt((void *)dest_addr); if (epaddr_context->epid == ep_priv->domain->psm_epid) return psmx_rma_self(PSMX_AM_REQ_WRITE, ep_priv, (void *)buf, len, desc, addr, key, context, flags, data); no_event = (flags & PSMX_NO_COMPLETION) || (ep_priv->send_selective_completion && !(flags & FI_COMPLETION)); if (flags & FI_INJECT) { if (len > PSMX_INJECT_SIZE) return -FI_EMSGSIZE; req = malloc(sizeof(*req) + len); if (!req) return -FI_ENOMEM; memset((void *)req, 0, sizeof(*req)); memcpy((void *)req + sizeof(*req), (void *)buf, len); buf = (void *)req + sizeof(*req); } else { req = calloc(1, sizeof(*req)); if (!req) return -FI_ENOMEM; PSMX_CTXT_TYPE(&req->fi_context) = no_event ? PSMX_NOCOMP_WRITE_CONTEXT : PSMX_WRITE_CONTEXT; } req->no_event = no_event; req->op = PSMX_AM_REQ_WRITE; req->write.buf = (void *)buf; req->write.len = len; req->write.addr = addr; /* needed? */ req->write.key = key; /* needed? */ req->write.context = context; req->ep = ep_priv; req->cq_flags = FI_WRITE | FI_RMA; PSMX_CTXT_USER(&req->fi_context) = context; PSMX_CTXT_EP(&req->fi_context) = ep_priv; chunk_size = MIN(PSMX_AM_CHUNK_SIZE, psmx_am_param.max_request_short); if (psmx_env.tagged_rma && len > chunk_size) { void *payload = NULL; int payload_len = 0; psm_tag = PSMX_RMA_BIT | ep_priv->domain->psm_epid; args[0].u32w0 = PSMX_AM_REQ_WRITE_LONG; args[0].u32w1 = len; args[1].u64 = (uint64_t)req; args[2].u64 = addr; args[3].u64 = key; args[4].u64 = psm_tag; nargs = 5; if (flags & FI_REMOTE_CQ_DATA) { args[0].u32w0 |= PSMX_AM_DATA; payload = &data; payload_len = sizeof(data); am_flags = 0; } if (flags & FI_DELIVERY_COMPLETE) { args[0].u32w0 |= PSMX_AM_FORCE_ACK; psm_context = NULL; } else { psm_context = (void *)&req->fi_context; } /* NOTE: if nargs is greater than 5, the following psm_mq_isend * would hang if the destination is on the same node (i.e. going * through the shared memory path). As the result, the immediate * data is sent as payload instead of args[5]. */ psm_am_request_short((psm_epaddr_t) dest_addr, PSMX_AM_RMA_HANDLER, args, nargs, payload, payload_len, am_flags, NULL, NULL); psm_mq_isend(ep_priv->domain->psm_mq, (psm_epaddr_t) dest_addr, 0, psm_tag, buf, len, psm_context, &psm_req); return 0; } nargs = 4; while (len > chunk_size) { args[0].u32w0 = PSMX_AM_REQ_WRITE; args[0].u32w1 = chunk_size; args[1].u64 = (uint64_t)(uintptr_t)req; args[2].u64 = addr; args[3].u64 = key; psm_am_request_short((psm_epaddr_t) dest_addr, PSMX_AM_RMA_HANDLER, args, nargs, (void *)buf, chunk_size, am_flags, NULL, NULL); buf += chunk_size; addr += chunk_size; len -= chunk_size; } args[0].u32w0 = PSMX_AM_REQ_WRITE | PSMX_AM_EOM; args[0].u32w1 = len; args[1].u64 = (uint64_t)(uintptr_t)req; args[2].u64 = addr; args[3].u64 = key; if (flags & FI_REMOTE_CQ_DATA) { args[4].u64 = data; args[0].u32w0 |= PSMX_AM_DATA; nargs++; } psm_am_request_short((psm_epaddr_t) dest_addr, PSMX_AM_RMA_HANDLER, args, nargs, (void *)buf, len, am_flags, NULL, NULL); return 0; }