psm_error_t __psm_am_reply_short(psm_am_token_t token, psm_handler_t handler, psm_amarg_t *args, int nargs, void *src, size_t len, int flags, psm_am_completion_fn_t completion_fn, void *completion_ctxt) { psm_error_t err; struct psmi_am_token *tok; psm_epaddr_t epaddr; ptl_ctl_t *ptlc; PSMI_ASSERT_INITIALIZED(); psmi_assert_always(token != NULL); psmi_assert(handler >= 0 && handler < psmi_am_parameters.max_handlers); psmi_assert(nargs >= 0 && nargs <= psmi_am_parameters.max_nargs); psmi_assert(nargs > 0 ? args != NULL : 1); psmi_assert(len >= 0 && len <= psmi_am_parameters.max_reply_short); psmi_assert(len > 0 ? src != NULL : 1); tok = (struct psmi_am_token *)token; epaddr = tok->epaddr_from; ptlc = epaddr->ptlctl; /* No locking here since we are already within handler context and already * locked */ err = ptlc->am_short_reply(token, handler, args, nargs, src, len, flags, completion_fn, completion_ctxt); return err; }
/* * * Force to remove a tid, check invalidation event afterwards. */ static psm2_error_t ips_tidcache_remove(struct ips_tid *tidc, uint32_t tidcnt) { cl_qmap_t *p_map = &tidc->tid_cachemap; uint32_t idx; psm2_error_t err; /* * call driver to free the tids. */ if (hfi_free_tid(tidc->context->ctrl, (uint64_t) (uintptr_t) tidc->tid_array, tidcnt) < 0) { /* If failed to unpin pages, it's fatal error */ err = psmi_handle_error(tidc->context->ep, PSM2_EP_DEVICE_FAILURE, "Failed to tid free %d tids", 1); return err; } while (tidcnt) { tidcnt--; idx = 2*IPS_TIDINFO_GET_TID(tidc->tid_array[tidcnt]) + IPS_TIDINFO_GET_TIDCTRL(tidc->tid_array[tidcnt]); /* * sanity check. */ psmi_assert(idx != 0); psmi_assert(idx <= tidc->tid_ctrl->tid_num_max); psmi_assert(INVALIDATE(idx) == 0); psmi_assert(REFCNT(idx) == 0); /* * mark the tid invalidated. */ INVALIDATE(idx) = 1; /* * remove the tid from RB tree. */ IDLE_REMOVE(idx); ips_cl_qmap_remove_item(p_map, &p_map->root[idx]); } /* * Because the freed tid is not from invalidation list, * it is possible that kernel just invalidated the tid, * then we need to check and process the invalidation * before we can re-use this tid. The reverse order * will wrongly invalidate this tid again. */ if ((*tidc->invalidation_event) & HFI1_EVENT_TID_MMU_NOTIFY) { err = ips_tidcache_invalidation(tidc); if (err) return err; } return PSM2_OK; }
int __recvpath psmi_mq_handle_data(psm_mq_req_t req, psm_epaddr_t epaddr, uint32_t egrid, uint32_t offset, const void *buf, uint32_t nbytes) { psm_mq_t mq; int rc; if (req == NULL) goto no_req; mq = req->mq; if (req->state == MQ_STATE_MATCHED) rc = MQ_RET_MATCH_OK; else { psmi_assert(req->state == MQ_STATE_UNEXP); rc = MQ_RET_UNEXP_OK; } psmi_assert(req->egrid.egr_data == egrid); psmi_mq_req_copy(req, epaddr, offset, buf, nbytes); if (req->send_msgoff == req->send_msglen) { if (req->type & MQE_TYPE_EGRLONG) { STAILQ_REMOVE(&epaddr->mctxt_master->egrlong, req, psm_mq_req, nextq); } if (req->state == MQ_STATE_MATCHED) { req->state = MQ_STATE_COMPLETE; mq_qq_append(&mq->completed_q, req); } else { /* MQ_STATE_UNEXP */ req->state = MQ_STATE_COMPLETE; } _IPATH_VDBG("epaddr=%s completed %d byte send, state=%d\n", psmi_epaddr_get_name(epaddr->epid), (int)req->send_msglen, req->state); } return rc; no_req: mq = epaddr->ep->mq; req = psmi_mq_req_alloc(mq, MQE_TYPE_RECV); psmi_assert(req != NULL); req->egrid.egr_data = egrid; req->recv_msgoff = offset; req->recv_msglen = nbytes; req->buf = psmi_mq_sysbuf_alloc(mq, nbytes); psmi_mq_mtucpy(req->buf, buf, nbytes); STAILQ_INSERT_TAIL(&epaddr->mctxt_master->egrdata, req, nextq); return MQ_RET_UNEXP_OK; }
void psmi_am_mq_handler(void *toki, psm2_amarg_t *args, int narg, void *buf, size_t len) { amsh_am_token_t *tok = (amsh_am_token_t *) toki; psm2_mq_req_t req; psm2_mq_tag_t tag; int rc; uint32_t opcode = args[0].u32w0; uint32_t msglen = opcode <= MQ_MSG_SHORT ? len : args[0].u32w1; tag.tag[0] = args[1].u32w1; tag.tag[1] = args[1].u32w0; tag.tag[2] = args[2].u32w1; psmi_assert(toki != NULL); _HFI_VDBG("mq=%p opcode=%d, len=%d, msglen=%d\n", tok->mq, opcode, (int)len, msglen); switch (opcode) { case MQ_MSG_TINY: case MQ_MSG_SHORT: case MQ_MSG_EAGER: rc = psmi_mq_handle_envelope(tok->mq, tok->tok.epaddr_incoming, &tag, msglen, 0, buf, (uint32_t) len, 1, opcode, &req); /* for eager matching */ req->ptl_req_ptr = (void *)tok->tok.epaddr_incoming; req->msg_seqnum = 0; /* using seqnum 0 */ break; default:{ void *sreq = (void *)(uintptr_t) args[3].u64w0; uintptr_t sbuf = (uintptr_t) args[4].u64w0; psmi_assert(narg == 5); psmi_assert_always(opcode == MQ_MSG_LONGRTS); rc = psmi_mq_handle_rts(tok->mq, tok->tok.epaddr_incoming, &tag, msglen, NULL, 0, 1, ptl_handle_rtsmatch, &req); req->rts_peer = tok->tok.epaddr_incoming; req->ptl_req_ptr = sreq; req->rts_sbuf = sbuf; if (rc == MQ_RET_MATCH_OK) /* we are in handler context, issue a reply */ ptl_handle_rtsmatch_request(req, 1, tok); /* else will be called later */ break; } } return; }
psm2_error_t ips_tidcache_release(struct ips_tid *tidc, uint32_t *tid_array, uint32_t tidcnt) { cl_qmap_t *p_map = &tidc->tid_cachemap; uint32_t i, j, idx; psm2_error_t err; psmi_assert(tidcnt > 0); j = 0; for (i = 0; i < tidcnt; i++) { /* * Driver only returns tidctrl=1 or tidctrl=2. */ idx = 2*IPS_TIDINFO_GET_TID(tid_array[i]) + IPS_TIDINFO_GET_TIDCTRL(tid_array[i]); psmi_assert(idx != 0); psmi_assert(idx <= tidc->tid_ctrl->tid_num_max); psmi_assert(REFCNT(idx) != 0); REFCNT(idx)--; if (REFCNT(idx) == 0) { if (INVALIDATE(idx) != 0) { ips_cl_qmap_remove_item(p_map, &p_map->root[idx]); tidc->tid_array[j] = tid_array[i]; j++; } else { IDLE_INSERT(idx); } } } if (j > 0) { /* * call driver to free the tids. */ if (hfi_free_tid(tidc->context->ctrl, (uint64_t) (uintptr_t) tidc->tid_array, j) < 0) { /* If failed to unpin pages, it's fatal error */ err = psmi_handle_error(tidc->context->ep, PSM2_EP_DEVICE_FAILURE, "Failed to tid free %d tids", j); return err; } } return PSM2_OK; }
psm2_error_t ips_tid_acquire(struct ips_tid *tidc, const void *buf, uint32_t *length, uint32_t *tid_array, uint32_t *tidcnt) { struct ips_tid_ctrl *ctrl = tidc->tid_ctrl; psm2_error_t err = PSM2_OK; int rc; psmi_assert(((uintptr_t) buf & 0xFFF) == 0); psmi_assert(((*length) & 0xFFF) == 0); if (tidc->context->tid_ctrl) pthread_spin_lock(&ctrl->tid_ctrl_lock); if (!ctrl->tid_num_avail) { err = PSM2_EP_NO_RESOURCES; goto fail; } /* Clip length if it exceeds worst case tid allocation, where each entry in the tid array can accomodate only 1 page. */ if (*length > 4096*tidc->tid_ctrl->tid_num_max) { *length = 4096*tidc->tid_ctrl->tid_num_max; } rc = hfi_update_tid(tidc->context->ctrl, (uint64_t) (uintptr_t) buf, length, (uint64_t) (uintptr_t) tid_array, tidcnt); if (rc < 0) { /* Unable to pin pages? retry later */ err = PSM2_EP_DEVICE_FAILURE; goto fail; } psmi_assert_always((*tidcnt) > 0); psmi_assert(ctrl->tid_num_avail >= (*tidcnt)); ctrl->tid_num_avail -= (*tidcnt); tidc->tid_num_total += (*tidcnt); tidc->tid_num_inuse += (*tidcnt); fail: if (tidc->context->tid_ctrl) pthread_spin_unlock(&ctrl->tid_ctrl_lock); return err; }
int __recvpath psmi_mq_handle_data(psm_mq_req_t req, psm_epaddr_t epaddr, const void *buf, uint32_t nbytes) { psm_mq_t mq = req->mq; int rc; if (req->state == MQ_STATE_MATCHED) rc = MQ_RET_MATCH_OK; else { psmi_assert(req->state == MQ_STATE_UNEXP); rc = MQ_RET_UNEXP_OK; } psmi_mq_req_copy(req, epaddr, buf, nbytes); if (req->send_msgoff == req->send_msglen) { if (req->type & MQE_TYPE_EGRLONG) { int flowid = req->egrid.egr_flowid; psmi_assert(STAILQ_FIRST(&epaddr->egrlong[flowid]) == req); STAILQ_REMOVE_HEAD(&epaddr->egrlong[flowid], nextq); } /* Whatever is leftover in the posted message should be now marked as * undefined. * XXX Sends not supported yet. */ #if 0 #ifdef PSM_VALGRIND if (req->send_msglen < req->buf_len) VALGRIND_MAKE_MEM_UNDEFINED( (void *) ((uintptr_t) req->buf + req->send_msglen), req->buf_len - req->send_msglen); #endif #endif if (req->state == MQ_STATE_MATCHED) { req->state = MQ_STATE_COMPLETE; mq_qq_append(&mq->completed_q, req); } else { /* MQ_STATE_UNEXP */ req->state = MQ_STATE_COMPLETE; } _IPATH_VDBG("epaddr=%s completed %d byte send, state=%d\n", psmi_epaddr_get_name(epaddr->epid), (int)req->send_msglen, req->state); } return rc; }
/* * * Call driver to free all cached tids. */ psm2_error_t ips_tidcache_cleanup(struct ips_tid *tidc) { cl_qmap_t *p_map = &tidc->tid_cachemap; psm2_error_t err; int i, j; j = 0; for (i = 1; i <= tidc->tid_ctrl->tid_num_max; i++) { psmi_assert(REFCNT(i) == 0); if (INVALIDATE(i) == 0) { tidc->tid_array[j++] = p_map->root[i].payload.tidinfo; } } if (j > 0) { /* * call driver to free the tids. */ if (hfi_free_tid(tidc->context->ctrl, (uint64_t) (uintptr_t) tidc->tid_array, j) < 0) { /* If failed to unpin pages, it's fatal error */ err = psmi_handle_error(tidc->context->ep, PSM2_EP_DEVICE_FAILURE, "Failed to tid free %d tids", j); return err; } } psmi_free(tidc->tid_array); psmi_free(tidc->tid_cachemap.root); return PSM2_OK; }
/* Note that the caller is responsible for making sure that NIDLE is non-zero before calling ips_tidcache_evict. If NIDLE is 0 at the time of call, ips_tidcache_evict is unstable. */ uint64_t ips_tidcache_evict(struct ips_tid *tidc,uint64_t length) { cl_qmap_t *p_map = &tidc->tid_cachemap; uint32_t idx = IHEAD, tidcnt = 0, tidlen = 0; /* * try to free the required * pages from idle queue tids */ do { idx = IPREV(idx); psmi_assert(idx != 0); tidc->tid_array[tidcnt] = p_map->root[idx].payload.tidinfo; tidcnt++; tidlen += IPS_TIDINFO_GET_LENGTH (p_map->root[idx].payload.tidinfo)<<12; } while (tidcnt < NIDLE && tidlen < length); /* * free the selected tids on successfully finding some:. */ if (tidcnt > 0 && ips_tidcache_remove(tidc, tidcnt)) return 0; return tidlen; }
int __recvpath psmi_mq_handle_rts_outoforder(psm_mq_t mq, uint64_t tag, uintptr_t send_buf, uint32_t send_msglen, psm_epaddr_t peer, uint16_t msg_seqnum, mq_rts_callback_fn_t cb, psm_mq_req_t *req_o) { psm_mq_req_t req; PSMI_PLOCK_ASSERT(); req = psmi_mq_req_alloc(mq, MQE_TYPE_RECV); psmi_assert(req != NULL); /* We don't know recv_msglen yet but we set it here for * mq_iprobe */ req->send_msglen = req->recv_msglen = send_msglen; req->state = MQ_STATE_UNEXP_RV; req->tag = tag; req->rts_callback = cb; req->recv_msgoff = 0; req->send_msgoff = 0; req->rts_peer = peer; req->rts_sbuf = send_buf; req->msg_seqnum = msg_seqnum; mq_sq_append(&peer->mctxt_master->outoforder_q, req); peer->mctxt_master->outoforder_c++; *req_o = req; /* no match, will callback */ _IPATH_VDBG("from=%s match=%s (req=%p) mqtag=%" PRIx64" recvlen=%d " "sendlen=%d errcode=%d\n", psmi_epaddr_get_name(peer->epid), "NO", req, req->tag, req->recv_msglen, req->send_msglen, req->error_code); return MQ_RET_UNEXP_OK; }
static psm2_error_t ptl_handle_rtsmatch(psm2_mq_req_t req, int was_posted) { /* was_posted == 0 allows us to assume that we're not running this callback * within am handler context (i.e. we can poll) */ psmi_assert(was_posted == 0); return ptl_handle_rtsmatch_request(req, 0, NULL); }
void psmi_am_mq_handler_rtsdone(void *toki, psm2_amarg_t *args, int narg, void *buf, size_t len) { psm2_mq_req_t rreq = (psm2_mq_req_t) (uintptr_t) args[0].u64w0; psmi_assert(narg == 1); _HFI_VDBG("[rndv][recv] req=%p dest=%p len=%d\n", rreq, rreq->buf, rreq->recv_msglen); psmi_mq_handle_rts_complete(rreq); }
psm_error_t __psm_am_request_short(psm_epaddr_t epaddr, psm_handler_t handler, psm_amarg_t *args, int nargs, void *src, size_t len, int flags, psm_am_completion_fn_t completion_fn, void *completion_ctxt) { psm_error_t err; ptl_ctl_t *ptlc = epaddr->ptlctl; PSMI_ASSERT_INITIALIZED(); psmi_assert(epaddr != NULL); psmi_assert(handler >= 0 && handler < psmi_am_parameters.max_handlers); psmi_assert(nargs >= 0 && nargs <= psmi_am_parameters.max_nargs); psmi_assert(nargs > 0 ? args != NULL : 1); psmi_assert(len >= 0 && len <= psmi_am_parameters.max_request_short); psmi_assert(len > 0 ? src != NULL : 1); PSMI_PLOCK(); err = ptlc->am_short_request(epaddr, handler, args, nargs, src, len, flags, completion_fn, completion_ctxt); PSMI_PUNLOCK(); return err; }
int __recvpath psmi_mq_handle_rts(psm_mq_t mq, uint64_t tag, uintptr_t send_buf, uint32_t send_msglen, psm_epaddr_t peer, mq_rts_callback_fn_t cb, psm_mq_req_t *req_o) { psm_mq_req_t req; uint32_t msglen; int rc; PSMI_PLOCK_ASSERT(); req = mq_req_match(&(mq->expected_q), tag, 1); if (req) { /* we have a match, no need to callback */ msglen = mq_set_msglen(req, req->buf_len, send_msglen); req->type = MQE_TYPE_RECV; req->state = MQ_STATE_MATCHED; req->tag = tag; req->recv_msgoff = 0; req->rts_peer = peer; req->rts_sbuf = send_buf; *req_o = req; /* yes match */ rc = MQ_RET_MATCH_OK; } else { /* No match, keep track of callback */ req = psmi_mq_req_alloc(mq, MQE_TYPE_RECV); psmi_assert(req != NULL); req->type = MQE_TYPE_RECV; /* We don't know recv_msglen yet but we set it here for * mq_iprobe */ req->send_msglen = req->recv_msglen = send_msglen; req->state = MQ_STATE_UNEXP_RV; req->tag = tag; req->rts_callback = cb; req->recv_msgoff = 0; req->rts_peer = peer; req->rts_sbuf = send_buf; mq_sq_append(&mq->unexpected_q, req); *req_o = req; /* no match, will callback */ rc = MQ_RET_UNEXP_OK; } _IPATH_VDBG("from=%s match=%s (req=%p) mqtag=%" PRIx64" recvlen=%d " "sendlen=%d errcode=%d\n", psmi_epaddr_get_name(peer->epid), rc == MQ_RET_MATCH_OK ? "YES" : "NO", req, req->tag, req->recv_msglen, req->send_msglen, req->error_code); return rc; }
void psmi_am_mq_handler_data(void *toki, psm2_amarg_t *args, int narg, void *buf, size_t len) { amsh_am_token_t *tok = (amsh_am_token_t *) toki; psmi_assert(toki != NULL); psm2_epaddr_t epaddr = (psm2_epaddr_t) tok->tok.epaddr_incoming; psm2_mq_req_t req = mq_eager_match(tok->mq, epaddr, 0); /* using seqnum 0 */ psmi_assert_always(req != NULL); psmi_mq_handle_data(tok->mq, req, args[2].u32w0, buf, len); return; }
void psmi_am_handler(void *toki, psm2_amarg_t *args, int narg, void *buf, size_t len) { amsh_am_token_t *tok = (amsh_am_token_t *) toki; psm2_am_handler_fn_t hfn; psmi_assert(toki != NULL); hfn = psm_am_get_handler_function(tok->mq->ep, (psm2_handler_t) args[0].u32w0); /* Invoke handler function. For AM we do not support break functionality */ hfn(toki, args + 1, narg - 1, buf, len); return; }
static psm2_error_t ptl_handle_rtsmatch_request(psm2_mq_req_t req, int was_posted, amsh_am_token_t *tok) { psm2_amarg_t args[5]; psm2_epaddr_t epaddr = req->rts_peer; ptl_t *ptl = epaddr->ptlctl->ptl; int pid = 0; PSM2_LOG_MSG("entering."); psmi_assert((tok != NULL && was_posted) || (tok == NULL && !was_posted)); _HFI_VDBG("[shm][rndv][recv] req=%p dest=%p len=%d tok=%p\n", req, req->buf, req->recv_msglen, tok); if ((ptl->psmi_kassist_mode & PSMI_KASSIST_GET) && req->recv_msglen > 0 && (pid = psmi_epaddr_pid(epaddr))) { /* cma can be done in handler context or not. */ size_t nbytes = cma_get(pid, (void *)req->rts_sbuf, req->buf, req->recv_msglen); psmi_assert_always(nbytes == req->recv_msglen); } args[0].u64w0 = (uint64_t) (uintptr_t) req->ptl_req_ptr; args[1].u64w0 = (uint64_t) (uintptr_t) req; args[2].u64w0 = (uint64_t) (uintptr_t) req->buf; args[3].u32w0 = req->recv_msglen; args[3].u32w1 = tok != NULL ? 1 : 0; args[4].u64w0 = 0; if (tok != NULL) { psmi_am_reqq_add(AMREQUEST_SHORT, tok->ptl, tok->tok.epaddr_incoming, mq_handler_rtsmatch_hidx, args, 5, NULL, 0, NULL, 0); } else psmi_amsh_short_request(ptl, epaddr, mq_handler_rtsmatch_hidx, args, 5, NULL, 0, 0); /* 0-byte completion or we used kassist */ if (pid || req->recv_msglen == 0) psmi_mq_handle_rts_complete(req); PSM2_LOG_MSG("leaving."); return PSM2_OK; }
static inline void ips_am_scb_init(ips_scb_t *scb, uint8_t handler, int nargs, int pad_bytes, psm2_am_completion_fn_t completion_fn, void *completion_ctxt) { psmi_assert(pad_bytes < (1 << IPS_AM_HDR_LEN_BITS)); scb->completion_am = completion_fn; scb->cb_param = completion_ctxt; scb->ips_lrh.amhdr_hidx = handler; scb->ips_lrh.amhdr_len = pad_bytes; scb->ips_lrh.amhdr_nargs = nargs; scb->ips_lrh.flags = 0; if (completion_fn) scb->flags |= IPS_SEND_FLAG_ACKREQ; return; }
psm2_error_t ips_am_short_reply(psm2_am_token_t tok, psm2_handler_t handler, psm2_amarg_t *args, int nargs, void *src, size_t len, int flags, psm2_am_completion_fn_t completion_fn, void *completion_ctxt) { struct ips_am_token *token = (struct ips_am_token *)tok; struct ips_proto_am *proto_am = token->proto_am; struct ips_epaddr *ipsaddr = token->epaddr_rail; int pad_bytes = calculate_pad_bytes(len); int scb_flags = 0; ips_scb_t *scb; if (!token->tok.can_reply) { _HFI_ERROR("Invalid AM reply for request!"); return PSM2_AM_INVALID_REPLY; } psmi_assert(ips_scbctrl_avail(&proto_am->scbc_reply)); if ((nargs << 3) + len <= (IPS_AM_HDR_NARGS << 3)) { scb = ips_scbctrl_alloc_tiny(&proto_am->scbc_reply); } else { int payload_sz = (nargs << 3); payload_sz += (flags & PSM2_AM_FLAG_ASYNC) ? 0 : (len + pad_bytes); scb_flags |= (payload_sz > (IPS_AM_HDR_NARGS << 3)) ? IPS_SCB_FLAG_ADD_BUFFER : 0; scb = ips_scbctrl_alloc(&proto_am->scbc_reply, 1, payload_sz, scb_flags); } psmi_assert_always(scb != NULL); ips_am_scb_init(scb, handler, nargs, pad_bytes, completion_fn, completion_ctxt); am_short_reqrep(scb, ipsaddr, args, nargs, OPCODE_AM_REPLY, src, len, flags, pad_bytes); return PSM2_OK; }
void psmi_am_mq_handler_rtsmatch(void *toki, psm2_amarg_t *args, int narg, void *buf, size_t len) { amsh_am_token_t *tok = (amsh_am_token_t *) toki; psmi_assert(toki != NULL); ptl_t *ptl = tok->ptl; psm2_mq_req_t sreq = (psm2_mq_req_t) (uintptr_t) args[0].u64w0; void *dest = (void *)(uintptr_t) args[2].u64w0; uint32_t msglen = args[3].u32w0; psm2_amarg_t rarg[1]; _HFI_VDBG("[rndv][send] req=%p dest_req=%p src=%p dest=%p len=%d\n", sreq, (void *)(uintptr_t) args[1].u64w0, sreq->buf, dest, msglen); if (msglen > 0) { rarg[0].u64w0 = args[1].u64w0; /* rreq */ int kassist_mode = ptl->psmi_kassist_mode; if (kassist_mode & PSMI_KASSIST_PUT) { int pid = psmi_epaddr_pid(tok->tok.epaddr_incoming); size_t nbytes = cma_put(sreq->buf, pid, dest, msglen); psmi_assert_always(nbytes == msglen); /* Send response that PUT is complete */ psmi_amsh_short_reply(tok, mq_handler_rtsdone_hidx, rarg, 1, NULL, 0, 0); } else if (!(kassist_mode & PSMI_KASSIST_MASK)) { /* Only transfer if kassist is off, i.e. neither GET nor PUT. */ psmi_amsh_long_reply(tok, mq_handler_rtsdone_hidx, rarg, 1, sreq->buf, msglen, dest, 0); } } psmi_mq_handle_rts_complete(sreq); }
psm2_error_t ips_tid_release(struct ips_tid *tidc, uint32_t *tid_array, uint32_t tidcnt) { struct ips_tid_ctrl *ctrl = tidc->tid_ctrl; psm2_error_t err = PSM2_OK; psmi_assert(tidcnt > 0); if (tidc->context->tid_ctrl) pthread_spin_lock(&ctrl->tid_ctrl_lock); if (hfi_free_tid(tidc->context->ctrl, (uint64_t) (uintptr_t) tid_array, tidcnt) < 0) { if (tidc->context->tid_ctrl) pthread_spin_unlock(&ctrl->tid_ctrl_lock); /* If failed to unpin pages, it's fatal error */ err = psmi_handle_error(tidc->context->ep, PSM2_EP_DEVICE_FAILURE, "Failed to tid free %d tids", tidcnt); goto fail; } ctrl->tid_num_avail += tidcnt; if (tidc->context->tid_ctrl) pthread_spin_unlock(&ctrl->tid_ctrl_lock); tidc->tid_num_inuse -= tidcnt; /* If an available callback is registered invoke it */ if (((tidc->tid_num_inuse + tidcnt) == ctrl->tid_num_max) && tidc->tid_avail_cb) tidc->tid_avail_cb(tidc, tidc->tid_avail_context); fail: return err; }
psm2_error_t ips_subcontext_ureg_get(ptl_t *ptl, uint32_t subcontext_cnt, psmi_context_t *context, struct ips_subcontext_ureg **uregp) { const struct hfi1_base_info *base_info = &context->ctrl->base_info; uintptr_t all_subcontext_uregbase = (uintptr_t) base_info->subctxt_uregbase; int i; psmi_assert_always(all_subcontext_uregbase != 0); for (i = 0; i < HFI1_MAX_SHARED_CTXTS; i++) { struct ips_subcontext_ureg *subcontext_ureg = (struct ips_subcontext_ureg *)all_subcontext_uregbase; *uregp++ = (i < subcontext_cnt) ? subcontext_ureg : NULL; all_subcontext_uregbase += sizeof(struct ips_subcontext_ureg); } ptl->recvshc->hwcontext_ctrl = (struct ips_hwcontext_ctrl *)all_subcontext_uregbase; all_subcontext_uregbase += sizeof(struct ips_hwcontext_ctrl); context->spio_ctrl = (void *)all_subcontext_uregbase; all_subcontext_uregbase += sizeof(struct ips_spio_ctrl); context->tid_ctrl = (void *)all_subcontext_uregbase; all_subcontext_uregbase += sizeof(struct ips_tid_ctrl); context->tf_ctrl = (void *)all_subcontext_uregbase; all_subcontext_uregbase += sizeof(struct ips_tf_ctrl); psmi_assert((all_subcontext_uregbase - (uintptr_t) base_info->subctxt_uregbase) <= PSMI_PAGESIZE); return PSM2_OK; }
/* * Get mmu notifier invalidation info and update PSM's caching. */ psm2_error_t ips_tidcache_invalidation(struct ips_tid *tidc) { cl_qmap_t *p_map = &tidc->tid_cachemap; uint32_t i, j, idx, tidcnt; psm2_error_t err; /* * get a list of invalidated tids from driver, * driver will clear the event bit before return. */ tidcnt = 0; if (hfi_get_invalidation(tidc->context->ctrl, (uint64_t) (uintptr_t) tidc->tid_array, &tidcnt) < 0) { /* If failed to get invalidation info, it's fatal error */ err = psmi_handle_error(tidc->context->ep, PSM2_EP_DEVICE_FAILURE, "Failed to get invalidation info"); return err; } psmi_assert(tidcnt > 0 && tidcnt <= tidc->tid_ctrl->tid_num_max); j = 0; for (i = 0; i < tidcnt; i++) { /* * Driver only returns tidctrl=1 or tidctrl=2. */ idx = 2*IPS_TIDINFO_GET_TID(tidc->tid_array[i]) + IPS_TIDINFO_GET_TIDCTRL(tidc->tid_array[i]); psmi_assert(idx != 0); psmi_assert(idx <= tidc->tid_ctrl->tid_num_max); /* * sanity check. */ psmi_assert(p_map->root[idx].payload.tidinfo == tidc->tid_array[i]); psmi_assert(LENGTH(idx) == IPS_TIDINFO_GET_LENGTH(tidc->tid_array[i])); /* * if the tid is already invalidated, ignore it, * but do sanity check. */ if (INVALIDATE(idx) != 0) { psmi_assert(REFCNT(idx) == 0); continue; } /* * mark the tid invalidated. */ INVALIDATE(idx) = 1; /* * if the tid is idle, remove the tid from RB tree * and idle queue, put on free list. */ if (REFCNT(idx) == 0) { IDLE_REMOVE(idx); ips_cl_qmap_remove_item(p_map, &p_map->root[idx]); if (i != j) tidc->tid_array[j] = tidc->tid_array[i]; j++; } } if (j > 0) { /* * call driver to free the tids. */ if (hfi_free_tid(tidc->context->ctrl, (uint64_t) (uintptr_t) tidc->tid_array, j) < 0) { /* If failed to unpin pages, it's fatal error */ err = psmi_handle_error(tidc->context->ep, PSM2_EP_DEVICE_FAILURE, "Failed to tid free %d tids", j); return err; } } return PSM2_OK; }
/* * Note, epaddr is the master. */ int __recvpath psmi_mq_handle_outoforder_queue(psm_epaddr_t epaddr) { psm_mq_t mq = epaddr->ep->mq; psm_mq_req_t ureq, ereq; uint32_t msglen; next_ooo: ureq = mq_ooo_match(&epaddr->outoforder_q, epaddr->mctxt_recv_seqnum); if (ureq == NULL) return 0; epaddr->mctxt_recv_seqnum++; epaddr->outoforder_c--; ereq = mq_req_match(&(mq->expected_q), ureq->tag, 1); if (ereq == NULL) { mq_sq_append(&mq->unexpected_q, ureq); if (epaddr->outoforder_c) goto next_ooo; return 0; } psmi_assert(MQE_TYPE_IS_RECV(ereq->type)); ereq->tag = ureq->tag; msglen = mq_set_msglen(ereq, ereq->buf_len, ureq->send_msglen); switch (ureq->state) { case MQ_STATE_COMPLETE: if (ureq->buf != NULL) { /* 0-byte don't alloc a sysbuf */ psmi_mq_mtucpy(ereq->buf, (const void *)ureq->buf, msglen); psmi_mq_sysbuf_free(mq, ureq->buf); } ereq->state = MQ_STATE_COMPLETE; mq_qq_append(&mq->completed_q, ereq); break; case MQ_STATE_UNEXP: /* not done yet */ ereq->type = ureq->type; ereq->egrid = ureq->egrid; ereq->epaddr = ureq->epaddr; ereq->send_msgoff = ureq->send_msgoff; ereq->recv_msgoff = min(ureq->recv_msgoff, msglen); psmi_mq_mtucpy(ereq->buf, (const void *)ureq->buf, ereq->recv_msgoff); psmi_mq_sysbuf_free(mq, ureq->buf); ereq->state = MQ_STATE_MATCHED; STAILQ_INSERT_AFTER(&ureq->epaddr->mctxt_master->egrlong, ureq, ereq, nextq); STAILQ_REMOVE(&ureq->epaddr->mctxt_master->egrlong, ureq, psm_mq_req, nextq); break; case MQ_STATE_UNEXP_RV: /* rendez-vous ... */ ereq->state = MQ_STATE_MATCHED; ereq->rts_peer = ureq->rts_peer; ereq->rts_sbuf = ureq->rts_sbuf; ereq->send_msgoff = 0; ereq->rts_callback = ureq->rts_callback; ereq->rts_reqidx_peer = ureq->rts_reqidx_peer; ereq->type = ureq->type; ereq->rts_callback(ereq, 0); break; default: fprintf(stderr, "Unexpected state %d in req %p\n", ureq->state, ureq); fprintf(stderr, "type=%d, mq=%p, tag=%p\n", ureq->type, ureq->mq, (void *)(uintptr_t)ureq->tag); abort(); } psmi_mq_req_free(ureq); if (epaddr->outoforder_c) goto next_ooo; return 0; }
int __recvpath psmi_mq_handle_envelope_outoforder(psm_mq_t mq, uint16_t mode, psm_epaddr_t epaddr, uint16_t msg_seqnum, uint64_t tag, psmi_egrid_t egrid, uint32_t send_msglen, const void *payload, uint32_t paylen) { psm_mq_req_t req; uint32_t msglen; req = psmi_mq_req_alloc(mq, MQE_TYPE_RECV); psmi_assert(req != NULL); req->tag = tag; req->recv_msgoff = 0; req->recv_msglen = req->send_msglen = req->buf_len = msglen = send_msglen; _IPATH_VDBG( "from=%s match=NO (req=%p) mode=%x mqtag=%" PRIx64 " send_msglen=%d\n", psmi_epaddr_get_name(epaddr->epid), req, mode, tag, send_msglen); switch (mode) { case MQ_MSG_TINY: if (msglen > 0) { req->buf = psmi_mq_sysbuf_alloc(mq, msglen); mq_copy_tiny((uint32_t *)req->buf, (uint32_t *)payload, msglen); } else req->buf = NULL; req->state = MQ_STATE_COMPLETE; break; case MQ_MSG_SHORT: req->buf = psmi_mq_sysbuf_alloc(mq, msglen); psmi_mq_mtucpy(req->buf, payload, msglen); req->state = MQ_STATE_COMPLETE; break; case MQ_MSG_LONG: req->egrid = egrid; req->epaddr = epaddr; req->send_msgoff = 0; req->buf = psmi_mq_sysbuf_alloc(mq, msglen); req->state = MQ_STATE_UNEXP; req->type |= MQE_TYPE_EGRLONG; STAILQ_INSERT_TAIL(&epaddr->mctxt_master->egrlong, req, nextq); _IPATH_VDBG("unexp MSG_LONG %d of length %d bytes pay=%d\n", egrid.egr_msgno, msglen, paylen); if (paylen > 0) psmi_mq_handle_data(req, epaddr, egrid.egr_data, 0, payload, paylen); psmi_mq_handle_egrdata(mq, req, epaddr); break; default: psmi_handle_error(PSMI_EP_NORETURN, PSM_INTERNAL_ERR, "Internal error, unknown packet 0x%x", mode); } req->msg_seqnum = msg_seqnum; mq_sq_append(&epaddr->mctxt_master->outoforder_q, req); epaddr->mctxt_master->outoforder_c++; mq->stats.rx_sys_bytes += msglen; mq->stats.rx_sys_num++; return MQ_RET_UNEXP_OK; }
/* * This handles the regular (i.e. non-rendezvous MPI envelopes) */ int __recvpath psmi_mq_handle_envelope(psm_mq_t mq, uint16_t mode, psm_epaddr_t epaddr, uint64_t tag, psmi_egrid_t egrid, uint32_t send_msglen, const void *payload, uint32_t paylen) { psm_mq_req_t req; uint32_t msglen; int rc; psmi_assert(epaddr != NULL); req = mq_req_match(&(mq->expected_q), tag, 1); if (req) { /* we have a match */ psmi_assert(MQE_TYPE_IS_RECV(req->type)); req->tag = tag; msglen = mq_set_msglen(req, req->buf_len, send_msglen); _IPATH_VDBG("from=%s match=YES (req=%p) mode=%x mqtag=%" PRIx64" msglen=%d paylen=%d\n", psmi_epaddr_get_name(epaddr->epid), req, mode, tag, msglen, paylen); switch(mode) { case MQ_MSG_TINY: PSM_VALGRIND_DEFINE_MQ_RECV(req->buf, req->buf_len, msglen); mq_copy_tiny((uint32_t *)req->buf, (uint32_t *)payload, msglen); req->state = MQ_STATE_COMPLETE; mq_qq_append(&mq->completed_q, req); break; case MQ_MSG_SHORT: /* message fits in 1 payload */ PSM_VALGRIND_DEFINE_MQ_RECV(req->buf, req->buf_len, msglen); psmi_mq_mtucpy(req->buf, payload, msglen); req->state = MQ_STATE_COMPLETE; mq_qq_append(&mq->completed_q, req); break; case MQ_MSG_LONG: req->egrid = egrid; req->state = MQ_STATE_MATCHED; req->type |= MQE_TYPE_EGRLONG; req->send_msgoff = req->recv_msgoff = 0; STAILQ_INSERT_TAIL(&epaddr->mctxt_master->egrlong, req, nextq); _IPATH_VDBG("exp MSG_LONG %d of length %d bytes pay=%d\n", egrid.egr_msgno, msglen, paylen); if (paylen > 0) psmi_mq_handle_data(req, epaddr, egrid.egr_data, 0, payload, paylen); psmi_mq_handle_egrdata(mq, req, epaddr); break; default: psmi_handle_error(PSMI_EP_NORETURN, PSM_INTERNAL_ERR, "Internal error, unknown packet 0x%x", mode); } mq->stats.rx_user_bytes += msglen; mq->stats.rx_user_num++; rc = MQ_RET_MATCH_OK; if (mode == MQ_MSG_LONG) return rc; } else rc = psmi_mq_handle_envelope_unexpected(mq, mode, epaddr, tag, egrid, send_msglen, payload, paylen); return rc; }
int __recvpath psmi_mq_handle_envelope_unexpected( psm_mq_t mq, uint16_t mode, psm_epaddr_t epaddr, uint64_t tag, psmi_egrid_t egrid, uint32_t send_msglen, const void *payload, uint32_t paylen) { psm_mq_req_t req; uint32_t msglen; /* * Keep a callback here in case we want to fit some other high-level * protocols over MQ (i.e. shmem). These protocols would bypass the * normal mesage handling and go to higher-level message handlers. */ if (mode >= MQ_MSG_USER_FIRST && mq->unexpected_callback) { mq->unexpected_callback(mq,mode,epaddr,tag,send_msglen,payload,paylen); return MQ_RET_UNEXP_OK; } req = psmi_mq_req_alloc(mq, MQE_TYPE_RECV); psmi_assert(req != NULL); req->tag = tag; req->recv_msgoff = 0; req->recv_msglen = req->send_msglen = req->buf_len = msglen = send_msglen; _IPATH_VDBG( "from=%s match=NO (req=%p) mode=%x mqtag=%" PRIx64 " send_msglen=%d\n", psmi_epaddr_get_name(epaddr->epid), req, mode, tag, send_msglen); #if 0 if (mq->cur_sysbuf_bytes+msglen > mq->max_sysbuf_bytes) { _IPATH_VDBG("req=%p with len=%d exceeds limit of %llu sysbuf_bytes\n", req, msglen, (unsigned long long) mq->max_sysbuf_bytes); return MQ_RET_UNEXP_NO_RESOURCES; } #endif switch (mode) { case MQ_MSG_TINY: if (msglen > 0) { req->buf = psmi_mq_sysbuf_alloc(mq, msglen); mq_copy_tiny((uint32_t *)req->buf, (uint32_t *)payload, msglen); } else req->buf = NULL; req->state = MQ_STATE_COMPLETE; break; case MQ_MSG_SHORT: req->buf = psmi_mq_sysbuf_alloc(mq, msglen); psmi_mq_mtucpy(req->buf, payload, msglen); req->state = MQ_STATE_COMPLETE; break; case MQ_MSG_LONG: req->egrid = egrid; req->send_msgoff = 0; req->buf = psmi_mq_sysbuf_alloc(mq, msglen); req->state = MQ_STATE_UNEXP; req->type |= MQE_TYPE_EGRLONG; STAILQ_INSERT_TAIL(&epaddr->mctxt_master->egrlong, req, nextq); _IPATH_VDBG("unexp MSG_LONG %d of length %d bytes pay=%d\n", egrid.egr_msgno, msglen, paylen); if (paylen > 0) psmi_mq_handle_data(req, epaddr, egrid.egr_data, 0, payload, paylen); psmi_mq_handle_egrdata(mq, req, epaddr); break; default: psmi_handle_error(PSMI_EP_NORETURN, PSM_INTERNAL_ERR, "Internal error, unknown packet 0x%x", mode); } mq_sq_append(&mq->unexpected_q, req); mq->stats.rx_sys_bytes += msglen; mq->stats.rx_sys_num++; return MQ_RET_UNEXP_OK; }
/* * Register a new buffer with driver, and cache the tidinfo. */ static psm2_error_t ips_tidcache_register(struct ips_tid *tidc, unsigned long start, uint32_t length, uint32_t *firstidx) { cl_qmap_t *p_map = &tidc->tid_cachemap; uint32_t tidoff, tidlen; uint32_t idx, tidcnt; psm2_error_t err; /* * make sure we have at least one free tid to * register the new buffer. */ if (NTID == tidc->tid_cachesize) { /* all tids are in active use, error? */ if (NIDLE == 0) return PSM2_OK_NO_PROGRESS; /* * free the first tid in idle queue. */ idx = IPREV(IHEAD); tidc->tid_array[0] = p_map->root[idx].payload.tidinfo; err = ips_tidcache_remove(tidc, 1); if (err) return err; } psmi_assert(NTID < tidc->tid_cachesize); /* Clip length if it exceeds worst case tid allocation, where each entry in the tid array can accomodate only 1 page. */ if (length > 4096*tidc->tid_ctrl->tid_num_max) { length = 4096*tidc->tid_ctrl->tid_num_max; } /* * register the new buffer. */ retry: tidcnt = 0; if (hfi_update_tid(tidc->context->ctrl, (uint64_t) start, &length, (uint64_t) tidc->tid_array, &tidcnt) < 0) { /* if driver reaches lockable memory limit */ if (errno == ENOMEM && NIDLE) { uint64_t lengthEvicted = ips_tidcache_evict(tidc,length); if (lengthEvicted >= length) goto retry; } /* Unable to pin pages? retry later */ return PSM2_EP_DEVICE_FAILURE; } psmi_assert_always(tidcnt > 0); psmi_assert((tidcnt+NTID) <= tidc->tid_cachesize); /* * backward processing because we want to return * the first RB index in the array. */ idx = 0; tidoff = length; while (tidcnt) { /* * Driver only returns tidctrl=1 or tidctrl=2. */ tidcnt--; idx = 2*IPS_TIDINFO_GET_TID(tidc->tid_array[tidcnt]) + IPS_TIDINFO_GET_TIDCTRL(tidc->tid_array[tidcnt]); tidlen = IPS_TIDINFO_GET_LENGTH(tidc->tid_array[tidcnt]); /* * sanity check. */ psmi_assert(idx != 0); psmi_assert(idx <= tidc->tid_ctrl->tid_num_max); psmi_assert(INVALIDATE(idx) != 0); psmi_assert(REFCNT(idx) == 0); /* * clear the tid invalidated. */ INVALIDATE(idx) = 0; /* * put the tid into a RB node. */ tidoff -= tidlen << 12; START(idx) = start + tidoff; LENGTH(idx) = tidlen; p_map->root[idx].payload.tidinfo = tidc->tid_array[tidcnt]; /* * put the node into RB tree and idle queue head. */ IDLE_INSERT(idx); ips_cl_qmap_insert_item(p_map, &p_map->root[idx]); } psmi_assert(idx != 0); psmi_assert(tidoff == 0); *firstidx = idx; return PSM2_OK; }
int ips_proto_am(struct ips_recvhdrq_event *rcv_ev) { struct ips_message_header *p_hdr = rcv_ev->p_hdr; struct ips_epaddr *ipsaddr = rcv_ev->ipsaddr; struct ips_proto_am *proto_am = &rcv_ev->proto->proto_am; ips_epaddr_flow_t flowid = ips_proto_flowid(p_hdr); struct ips_flow *flow; struct ips_am_message *msg = NULL; int ret = IPS_RECVHDRQ_CONTINUE; enum ips_msg_order msgorder; psmi_assert(flowid < EP_FLOW_LAST); flow = &ipsaddr->flows[flowid]; /* * Based on AM request/reply traffic pattern, if we don't have a reply * scb slot then we can't process the request packet, we just silently * drop it. Otherwise, it will be a deadlock. note: * ips_proto_is_expected_or_nak() can not be called in this case. */ if (_get_proto_hfi_opcode(p_hdr) == OPCODE_AM_REQUEST && !ips_scbctrl_avail(&proto_am->scbc_reply)) return IPS_RECVHDRQ_CONTINUE; if (!ips_proto_is_expected_or_nak(rcv_ev)) return IPS_RECVHDRQ_CONTINUE; uint16_t send_msgseq = __le32_to_cpu(p_hdr->khdr.kdeth0) & HFI_KHDR_MSGSEQ_MASK; msgorder = ips_proto_check_msg_order(ipsaddr, flow, send_msgseq, &ipsaddr->msgctl->am_recv_seqnum); if (msgorder == IPS_MSG_ORDER_FUTURE) return IPS_RECVHDRQ_REVISIT; else if (msgorder == IPS_MSG_ORDER_FUTURE_RECV) { uint64_t *msg_payload; uint64_t *payload = ips_recvhdrq_event_payload(rcv_ev); uint32_t paylen = ips_recvhdrq_event_paylen(rcv_ev); psmi_assert(paylen == 0 || payload); msg = psmi_mpool_get(ips_am_msg_pool); msg_payload = psmi_sysbuf_alloc( ips_recvhdrq_event_paylen(rcv_ev)); if (unlikely(msg == NULL || msg_payload == NULL)) { /* Out of memory, drop the packet. */ printf("%d OOM dropping %d\n", getpid(), send_msgseq); flow->recv_seq_num.psn_num = (flow->recv_seq_num.psn_num - 1) & rcv_ev->proto->psn_mask; return IPS_RECVHDRQ_BREAK; } memcpy(&msg->p_hdr, p_hdr, sizeof(struct ips_message_header)); memcpy(msg_payload, payload, paylen); msg->payload = msg_payload; msg->ipsaddr = ipsaddr; msg->proto_am = proto_am; msg->paylen = paylen; msg->seqnum = __le32_to_cpu(p_hdr->khdr.kdeth0) & HFI_KHDR_MSGSEQ_MASK; ips_proto_am_queue_msg(msg); } else if ((msgorder == IPS_MSG_ORDER_EXPECTED) || (msgorder == IPS_MSG_ORDER_EXPECTED_MATCH)) { uint64_t *payload = ips_recvhdrq_event_payload(rcv_ev); uint32_t paylen = ips_recvhdrq_event_paylen(rcv_ev); psmi_assert(paylen == 0 || payload); if (ips_am_run_handler(p_hdr, ipsaddr, proto_am, payload, paylen)) ret = IPS_RECVHDRQ_BREAK; ips_proto_am_handle_outoforder_queue(); } /* Look if the handler replied, if it didn't, ack the request */ if ((__be32_to_cpu(p_hdr->bth[2]) & IPS_SEND_FLAG_ACKREQ) || (flow->flags & IPS_FLOW_FLAG_GEN_BECN)) ips_proto_send_ack((struct ips_recvhdrq *)rcv_ev->recvq, flow); ips_proto_process_ack(rcv_ev); return ret; }
psm2_error_t ips_tidcache_acquire(struct ips_tid *tidc, const void *buf, uint32_t *length, uint32_t *tid_array, uint32_t *tidcnt, uint32_t *tidoff) { cl_qmap_t *p_map = &tidc->tid_cachemap; cl_map_item_t *p_item; unsigned long start = (unsigned long)buf; unsigned long end = start + (*length); uint32_t idx, nbytes; psm2_error_t err; /* * Before every tid caching search, we need to update the * tid caching if there is invalidation event, otherwise, * the cached address may be invalidated and we might have * wrong matching. */ if ((*tidc->invalidation_event) & HFI1_EVENT_TID_MMU_NOTIFY) { err = ips_tidcache_invalidation(tidc); if (err) return err; } /* * Now we can do matching from the caching, because obsolete * address in caching has been removed or identified. */ retry: p_item = ips_cl_qmap_search(p_map, start, end); idx = 2*IPS_TIDINFO_GET_TID(p_item->payload.tidinfo) + IPS_TIDINFO_GET_TIDCTRL(p_item->payload.tidinfo); /* * There is tid matching. */ if (idx) { /* * if there is a caching match, but the tid has been * invalidated, we can't match this tid, and we also * can't register this address, we need to wait this * tid to be freed. */ if (INVALIDATE(idx) != 0) return PSM2_OK_NO_PROGRESS; /* * if the page offset within the tid is not less than * 128K, the address offset within the page is not 64B * multiple, PSM can't handle this tid with any offset * mode. We need to free this tid and re-register with * the asked page address. */ if (((start - START(idx)) >= 131072) && ((*tidoff) & 63)) { /* * If the tid is currently used, retry later. */ if (REFCNT(idx) != 0) return PSM2_OK_NO_PROGRESS; /* * free this tid. */ tidc->tid_array[0] = p_map->root[idx].payload.tidinfo; err = ips_tidcache_remove(tidc, 1); if (err) return err; /* try to match a node again */ goto retry; } } /* * If there is no match node, or 'start' falls out of node range, * whole or partial buffer from 'start' is not registered yet. */ if (!idx || START(idx) > start) { if (!idx) nbytes = end - start; else nbytes = START(idx) - start; /* * Because we don't have any match tid yet, if * there is an error, we return from here, PSM * will try later. */ err = ips_tidcache_register(tidc, start, nbytes, &idx); if (err) return err; } /* * sanity check. */ psmi_assert(START(idx) <= start); psmi_assert(INVALIDATE(idx) == 0); *tidoff += start - START(idx); *tidcnt = 1; tid_array[0] = p_map->root[idx].payload.tidinfo; REFCNT(idx)++; if (REFCNT(idx) == 1) IDLE_REMOVE(idx); start = END(idx); while (start < end) { p_item = ips_cl_qmap_successor(p_map, &p_map->root[idx]); idx = 2*IPS_TIDINFO_GET_TID(p_item->payload.tidinfo) + IPS_TIDINFO_GET_TIDCTRL(p_item->payload.tidinfo); if (!idx || START(idx) != start) { if (!idx) nbytes = end - start; else nbytes = (START(idx) > end) ? (end - start) : (START(idx) - start); /* * Because we already have at least one match tid, * if it is error to register new pages, we break * here and return the tids we already have. */ err = ips_tidcache_register(tidc, start, nbytes, &idx); if (err) break; } else if (INVALIDATE(idx) != 0) { /* * the tid has been invalidated, it is still in * caching because it is still being used, but * any new usage is not allowed, we ignore it and * return the tids we already have. */ psmi_assert(REFCNT(idx) != 0); break; } /* * sanity check. */ psmi_assert(START(idx) == start); psmi_assert(INVALIDATE(idx) == 0); tid_array[(*tidcnt)++] = p_map->root[idx].payload.tidinfo; REFCNT(idx)++; if (REFCNT(idx) == 1) IDLE_REMOVE(idx); start = END(idx); } if (start < end) *length = start - (unsigned long)buf; /* otherwise, all pages are registered */ psmi_assert((*tidcnt) > 0); return PSM2_OK; }