static inline ssize_t uct_ugni_post_fma(uct_ugni_rdma_iface_t *iface, uct_ugni_ep_t *ep, uct_ugni_base_desc_t *fma, ssize_t ok_status) { gni_return_t ugni_rc; if (ucs_unlikely(!uct_ugni_ep_can_send(ep))) { ucs_mpool_put(fma); return UCS_ERR_NO_RESOURCE; } uct_ugni_device_lock(&iface->super.cdm); ugni_rc = GNI_PostFma(ep->ep, &fma->desc); uct_ugni_device_unlock(&iface->super.cdm); if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) { ucs_mpool_put(fma); if(GNI_RC_ERROR_RESOURCE == ugni_rc || GNI_RC_ERROR_NOMEM == ugni_rc) { ucs_debug("GNI_PostFma failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_NO_RESOURCE; } else { ucs_error("GNI_PostFma failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_IO_ERROR; } } ++fma->flush_group->flush_comp.count; ++iface->super.outstanding; return ok_status; }
static inline ucs_status_t uct_ugni_post_rdma(uct_ugni_rdma_iface_t *iface, uct_ugni_ep_t *ep, uct_ugni_base_desc_t *rdma) { gni_return_t ugni_rc; if (ucs_unlikely(!uct_ugni_can_send(ep))) { ucs_mpool_put(rdma); return UCS_ERR_NO_RESOURCE; } ugni_rc = GNI_PostRdma(ep->ep, &rdma->desc); if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) { ucs_mpool_put(rdma); if(GNI_RC_ERROR_RESOURCE == ugni_rc || GNI_RC_ERROR_NOMEM == ugni_rc) { ucs_debug("GNI_PostRdma failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_NO_RESOURCE; } else { ucs_error("GNI_PostRdma failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_IO_ERROR; } } ++ep->outstanding; ++iface->super.outstanding; return UCS_INPROGRESS; }
static void free_composed_desc(void *arg) { uct_ugni_rdma_fetch_desc_t *desc = (uct_ugni_rdma_fetch_desc_t*)arg; uct_ugni_rdma_fetch_desc_t *fma = ucs_container_of(desc->super.comp_cb, uct_ugni_rdma_fetch_desc_t, tmp); uct_ugni_rdma_fetch_desc_t *rdma = fma->head; if (0 == --rdma->tmp.count) { fma->super.free_cb = rdma->super.free_cb = ucs_mpool_put; ucs_mpool_put(fma); ucs_mpool_put(rdma); } }
static void progress_local_cq(uct_ugni_smsg_iface_t *iface){ gni_return_t ugni_rc; gni_cq_entry_t event_data; uct_ugni_smsg_desc_t message_data; uct_ugni_smsg_desc_t *message_pointer; if(0 == iface->super.outstanding){ return; } ugni_rc = GNI_CqGetEvent(iface->super.local_cq, &event_data); if(GNI_RC_NOT_DONE == ugni_rc){ return; } if((GNI_RC_SUCCESS != ugni_rc && !event_data) || GNI_CQ_OVERRUN(event_data)){ /* TODO: handle overruns */ ucs_error("Error posting data. CQ overrun = %d", (int)GNI_CQ_OVERRUN(event_data)); return; } message_data.msg_id = GNI_CQ_GET_MSG_ID(event_data); message_pointer = sglib_hashed_uct_ugni_smsg_desc_t_find_member(iface->smsg_list,&message_data); message_pointer->ep->outstanding--; iface->super.outstanding--; sglib_hashed_uct_ugni_smsg_desc_t_delete(iface->smsg_list,message_pointer); ucs_mpool_put(message_pointer); }
static ucs_arbiter_cb_result_t uct_dc_ep_abriter_purge_cb(ucs_arbiter_t *arbiter, ucs_arbiter_elem_t *elem, void *arg) { uct_purge_cb_args_t *cb_args = arg; uct_pending_purge_callback_t cb = cb_args->cb; uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); uct_rc_fc_request_t *freq = ucs_derived_of(req, uct_rc_fc_request_t); uct_dc_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_dc_ep_t, arb_group); if (ucs_likely(req->func != uct_dc_iface_fc_grant)){ if (cb != NULL) { cb(req, cb_args->arg); } else { ucs_warn("ep=%p cancelling user pending request %p", ep, req); } } else { /* User callback should not be called for FC messages. * Just return pending request memory to the pool */ ucs_mpool_put(freq); } return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; }
static void uct_ugni_unalign_fma_composed_cb(uct_completion_t *self, ucs_status_t status) { uct_ugni_rdma_fetch_desc_t *fma = (uct_ugni_rdma_fetch_desc_t *) ucs_container_of(self, uct_ugni_rdma_fetch_desc_t, tmp); uct_ugni_rdma_fetch_desc_t *head_fma = fma; ucs_assert(head_fma->network_completed_bytes == 0 && head_fma->expected_bytes != 0); head_fma->network_completed_bytes += head_fma->super.desc.length; ucs_assert(head_fma->network_completed_bytes <= head_fma->expected_bytes); /* Check if messages is completed */ if (head_fma->network_completed_bytes == head_fma->expected_bytes) { assemble_composed_unaligned(head_fma); /* Call the orignal callback and skip padding */ uct_ugni_invoke_orig_comp(head_fma, status); if(head_fma->head != NULL) { head_fma->head->super.not_ready_to_free = 0; ucs_mpool_put(head_fma->head); } } else { head_fma->super.not_ready_to_free = 1; } }
void uct_mm_iface_release_desc(uct_recv_desc_t *self, void *desc) { void *mm_desc; mm_desc = desc - sizeof(uct_mm_recv_desc_t); ucs_mpool_put(mm_desc); }
static ucs_status_t progress_local_cq(uct_ugni_smsg_iface_t *iface){ gni_return_t ugni_rc; gni_cq_entry_t event_data; uct_ugni_smsg_desc_t message_data; uct_ugni_smsg_desc_t *message_pointer; ugni_rc = GNI_CqGetEvent(iface->super.local_cq, &event_data); if(GNI_RC_NOT_DONE == ugni_rc){ return UCS_OK; } if((GNI_RC_SUCCESS != ugni_rc && !event_data) || GNI_CQ_OVERRUN(event_data)){ /* TODO: handle overruns */ ucs_error("Error posting data. CQ overrun = %d", (int)GNI_CQ_OVERRUN(event_data)); return UCS_ERR_NO_RESOURCE; } message_data.msg_id = GNI_CQ_GET_MSG_ID(event_data); message_pointer = sglib_hashed_uct_ugni_smsg_desc_t_find_member(iface->smsg_list,&message_data); ucs_assert(NULL != message_pointer); message_pointer->ep->outstanding--; iface->super.outstanding--; uct_ugni_ep_check_flush(message_pointer->ep); sglib_hashed_uct_ugni_smsg_desc_t_delete(iface->smsg_list,message_pointer); ucs_mpool_put(message_pointer); return UCS_INPROGRESS; }
static ucs_status_t recieve_datagram(uct_ugni_udt_iface_t *iface, uint64_t id, uct_ugni_udt_ep_t **ep_out) { uint32_t rem_addr, rem_id; gni_post_state_t post_state; gni_return_t ugni_rc; uct_ugni_udt_ep_t *ep; gni_ep_handle_t gni_ep; uct_ugni_udt_desc_t *desc; uct_ugni_udt_header_t *header; ucs_trace_func("iface=%p, id=%lx", iface, id); if (UCT_UGNI_UDT_ANY == id) { ep = NULL; gni_ep = iface->ep_any; desc = iface->desc_any; } else { ep = ucs_derived_of(uct_ugni_iface_lookup_ep(&iface->super, id), uct_ugni_udt_ep_t); gni_ep = ep->super.ep; desc = ep->posted_desc; } *ep_out = ep; uct_ugni_device_lock(&iface->super.cdm); ugni_rc = GNI_EpPostDataWaitById(gni_ep, id, -1, &post_state, &rem_addr, &rem_id); uct_ugni_device_unlock(&iface->super.cdm); if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) { ucs_error("GNI_EpPostDataWaitById, id=%lu Error status: %s %d", id, gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_IO_ERROR; } if (GNI_POST_TERMINATED == post_state) { return UCS_ERR_CANCELED; } if (GNI_POST_COMPLETED != post_state) { ucs_error("GNI_EpPostDataWaitById gave unexpected response: %u", post_state); return UCS_ERR_IO_ERROR; } if (UCT_UGNI_UDT_ANY != id) { --iface->super.outstanding; } header = uct_ugni_udt_get_rheader(desc, iface); ucs_trace("Got datagram id: %lu type: %i len: %i am_id: %i", id, header->type, header->length, header->am_id); if (UCT_UGNI_UDT_PAYLOAD != header->type) { /* ack message, no data */ ucs_assert_always(NULL != ep); ucs_mpool_put(ep->posted_desc); uct_ugni_check_flush(ep->desc_flush_group); ep->posted_desc = NULL; return UCS_OK; } return UCS_INPROGRESS; }
void uct_ugni_proccess_datagram_pipe(int event_id, void *arg) { uct_ugni_udt_iface_t *iface = (uct_ugni_udt_iface_t *)arg; uct_ugni_udt_ep_t *ep; uct_ugni_udt_desc_t *datagram; ucs_status_t status; void *user_desc; gni_return_t ugni_rc; uint64_t id; ucs_trace_func(""); uct_ugni_device_lock(&iface->super.cdm); ugni_rc = GNI_PostDataProbeById(uct_ugni_udt_iface_nic_handle(iface), &id); uct_ugni_device_unlock(&iface->super.cdm); while (GNI_RC_SUCCESS == ugni_rc) { status = recieve_datagram(iface, id, &ep); if (UCS_INPROGRESS == status) { if (ep != NULL){ ucs_trace_data("Processing reply"); datagram = ep->posted_desc; status = processs_datagram(iface, datagram); if (UCS_OK != status) { user_desc = uct_ugni_udt_get_user_desc(datagram, iface); uct_recv_desc(user_desc) = &iface->release_desc; } else { ucs_mpool_put(datagram); } ep->posted_desc = NULL; uct_ugni_check_flush(ep->desc_flush_group); } else { ucs_trace_data("Processing wildcard"); datagram = iface->desc_any; status = processs_datagram(iface, datagram); if (UCS_OK != status) { UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, iface->desc_any, iface->desc_any=NULL); user_desc = uct_ugni_udt_get_user_desc(datagram, iface); uct_recv_desc(user_desc) = &iface->release_desc; } status = uct_ugni_udt_ep_any_post(iface); if (UCS_OK != status) { /* We can't continue if we can't post the first receive */ ucs_error("Failed to post wildcard request"); return; } } } uct_ugni_device_lock(&iface->super.cdm); ugni_rc = GNI_PostDataProbeById(uct_ugni_udt_iface_nic_handle(iface), &id); uct_ugni_device_unlock(&iface->super.cdm); } ucs_async_pipe_drain(&iface->event_pipe); pthread_mutex_lock(&iface->device_lock); iface->events_ready = 0; pthread_mutex_unlock(&iface->device_lock); ucs_trace("Signaling device thread to resume monitoring"); pthread_cond_signal(&iface->device_condition); }
static UCS_F_ALWAYS_INLINE ucs_status_t uct_ugni_smsg_ep_am_common_send(uct_ugni_smsg_ep_t *ep, uct_ugni_smsg_iface_t *iface, uint8_t am_id, unsigned header_length, void *header, unsigned payload_length, void *payload, uct_ugni_smsg_desc_t *desc) { gni_return_t gni_rc; if (ucs_unlikely(!uct_ugni_ep_can_send(&ep->super))) { goto exit_no_res; } desc->msg_id = iface->smsg_id++; desc->flush_group = ep->super.flush_group; uct_ugni_cdm_lock(&iface->super.cdm); gni_rc = GNI_SmsgSendWTag(ep->super.ep, header, header_length, payload, payload_length, desc->msg_id, am_id); uct_ugni_cdm_unlock(&iface->super.cdm); if(GNI_RC_SUCCESS != gni_rc){ goto exit_no_res; } ++desc->flush_group->flush_comp.count; ++iface->super.outstanding; sglib_hashed_uct_ugni_smsg_desc_t_add(iface->smsg_list, desc); return UCS_OK; exit_no_res: ucs_trace("Smsg send failed."); ucs_mpool_put(desc); UCS_STATS_UPDATE_COUNTER(ep->super.super.stats, UCT_EP_STAT_NO_RES, 1); return UCS_ERR_NO_RESOURCE; }
static void uct_self_iface_release_am_desc(uct_iface_t *tl_iface, void *desc) { uct_am_recv_desc_t *self_desc = 0; self_desc = (uct_am_recv_desc_t *) desc - 1; ucs_trace_func("iface=%p, desc=%p", tl_iface, self_desc); ucs_mpool_put(self_desc); }
static UCS_CLASS_CLEANUP_FUNC(uct_self_iface_t) { ucs_trace_func("self=%p", self); if (self->msg_cur_desc) { ucs_mpool_put(self->msg_cur_desc); } ucs_mpool_cleanup(&self->msg_desc_mp, 1); }
static void uct_ugni_udt_iface_release_desc(uct_recv_desc_t *self, void *desc) { uct_ugni_udt_desc_t *ugni_desc; uct_ugni_udt_iface_t *iface = ucs_container_of(self, uct_ugni_udt_iface_t, release_desc); ugni_desc = (uct_ugni_udt_desc_t *)((uct_recv_desc_t *)desc - 1); ucs_assert_always(NULL != ugni_desc); uct_ugni_udt_reset_desc(ugni_desc, iface); ucs_mpool_put(ugni_desc); }
static UCS_CLASS_CLEANUP_FUNC(uct_ugni_smsg_iface_t) { uct_worker_progress_unregister(self->super.super.worker, uct_ugni_smsg_progress, self); if (!self->super.activated) { return; } ucs_mpool_put(self->user_desc); ucs_mpool_cleanup(&self->free_desc, 1); ucs_mpool_cleanup(&self->free_mbox, 1); }
static UCS_CLASS_CLEANUP_FUNC(uct_ugni_smsg_ep_t) { uct_ugni_smsg_iface_t *iface = ucs_derived_of(self->super.super.super.iface, uct_ugni_smsg_iface_t); ucs_status_t status; do { status = iface->super.super.super.ops.ep_flush(&self->super.super.super, 0, NULL); } while(UCS_OK != status); progress_remote_cq(iface); uct_ugni_smsg_mbox_dereg(iface, self->smsg_attr); ucs_mpool_put(self->smsg_attr); }
static inline ucs_status_ptr_t ucp_tag_send_req(ucp_request_t *req, size_t count, ssize_t max_short, size_t zcopy_thresh, size_t rndv_thresh, ucp_send_callback_t cb, const ucp_proto_t *proto) { ucs_status_t status; switch (req->send.datatype & UCP_DATATYPE_CLASS_MASK) { case UCP_DATATYPE_CONTIG: status = ucp_tag_req_start_contig(req, count, max_short, zcopy_thresh, rndv_thresh, proto); if (status != UCS_OK) { return UCS_STATUS_PTR(status); } break; case UCP_DATATYPE_IOV: status = ucp_tag_req_start_iov(req, count, max_short, zcopy_thresh, rndv_thresh, proto); if (status != UCS_OK) { return UCS_STATUS_PTR(status); } break; case UCP_DATATYPE_GENERIC: ucp_tag_req_start_generic(req, count, rndv_thresh, proto); break; default: ucs_error("Invalid data type"); return UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM); } /* * Start the request. * If it is completed immediately, release the request and return the status. * Otherwise, return the request. */ status = ucp_request_start_send(req); if (req->flags & UCP_REQUEST_FLAG_COMPLETED) { ucs_trace_req("releasing send request %p, returning status %s", req, ucs_status_string(status)); ucs_mpool_put(req); return UCS_STATUS_PTR(status); } ucs_trace_req("returning send request %p", req); req->send.cb = cb; return req + 1; }
static ucs_status_ptr_t ucp_disconnect_nb_internal(ucp_ep_h ep) { ucs_status_t status; ucp_request_t *req; ucs_debug("disconnect ep %p", ep); req = ucs_mpool_get(&ep->worker->req_mp); if (req == NULL) { return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); } /* * Flush operation can be queued on the pending queue of only one of the * lanes (indicated by req->send.lane) and scheduled for completion on any * number of lanes. req->send.uct_comp.count keeps track of how many lanes * are not flushed yet, and when it reaches zero, it means all lanes are * flushed. req->send.flush.lanes keeps track of which lanes we still have * to start flush on. * If a flush is completed from a pending/completion callback, we need to * schedule slow-path callback to release the endpoint later, since a UCT * endpoint cannot be released from pending/completion callback context. */ req->flags = 0; req->status = UCS_OK; req->send.ep = ep; req->send.flush.flushed_cb = ucp_ep_disconnected; req->send.flush.lanes = UCS_MASK(ucp_ep_num_lanes(ep)); req->send.flush.cbq_elem.cb = ucp_ep_flushed_slow_path_callback; req->send.flush.cbq_elem_on = 0; req->send.lane = UCP_NULL_LANE; req->send.uct.func = ucp_ep_flush_progress_pending; req->send.uct_comp.func = ucp_ep_flush_completion; req->send.uct_comp.count = ucp_ep_num_lanes(ep); ucp_ep_flush_progress(req); if (req->send.uct_comp.count == 0) { status = req->status; ucp_ep_disconnected(req); ucs_trace_req("ep %p: releasing flush request %p, returning status %s", ep, req, ucs_status_string(status)); ucs_mpool_put(req); return UCS_STATUS_PTR(status); } ucs_trace_req("ep %p: return inprogress flush request %p (%p)", ep, req, req + 1); return req + 1; }
static UCS_CLASS_CLEANUP_FUNC(uct_ugni_udt_iface_t) { void *dummy; uct_ugni_enter_async(&self->super); uct_ugni_udt_clean_wildcard(self); ucs_async_remove_handler(ucs_async_pipe_rfd(&self->event_pipe),1); if (self->events_ready) { uct_ugni_proccess_datagram_pipe(ucs_async_pipe_rfd(&self->event_pipe),self); } uct_ugni_udt_terminate_thread(self); pthread_join(self->event_thread, &dummy); ucs_async_pipe_destroy(&self->event_pipe); ucs_mpool_put(self->desc_any); ucs_mpool_cleanup(&self->free_desc, 1); pthread_mutex_destroy(&self->device_lock); uct_ugni_leave_async(&self->super); }
void uct_ugni_progress(void *arg) { gni_cq_entry_t event_data = 0; gni_post_descriptor_t *event_post_desc_ptr; uct_ugni_base_desc_t *desc; uct_ugni_iface_t * iface = (uct_ugni_iface_t *)arg; gni_return_t ugni_rc; ugni_rc = GNI_CqGetEvent(iface->local_cq, &event_data); if (GNI_RC_NOT_DONE == ugni_rc) { goto out; } if ((GNI_RC_SUCCESS != ugni_rc && !event_data) || GNI_CQ_OVERRUN(event_data)) { ucs_error("GNI_CqGetEvent falied. Error status %s %d ", gni_err_str[ugni_rc], ugni_rc); return; } ugni_rc = GNI_GetCompleted(iface->local_cq, event_data, &event_post_desc_ptr); if (GNI_RC_SUCCESS != ugni_rc && GNI_RC_TRANSACTION_ERROR != ugni_rc) { ucs_error("GNI_GetCompleted falied. Error status %s %d %d", gni_err_str[ugni_rc], ugni_rc, GNI_RC_TRANSACTION_ERROR); return; } desc = (uct_ugni_base_desc_t *)event_post_desc_ptr; ucs_trace_async("Completion received on %p", desc); if (NULL != desc->comp_cb) { uct_invoke_completion(desc->comp_cb, UCS_OK); } --iface->outstanding; --desc->ep->outstanding; if (ucs_likely(0 == desc->not_ready_to_free)) { ucs_mpool_put(desc); } uct_ugni_ep_check_flush(desc->ep); out: /* have a go a processing the pending queue */ ucs_arbiter_dispatch(&iface->arbiter, 1, uct_ugni_ep_process_pending, NULL); return; }
static inline void uct_ud_ep_process_ack(uct_ud_iface_t *iface, uct_ud_ep_t *ep, uct_ud_psn_t ack_psn) { uct_ud_send_skb_t *skb; if (ucs_unlikely(UCT_UD_PSN_COMPARE(ack_psn, <=, ep->tx.acked_psn))) { return; } ep->tx.acked_psn = ack_psn; /* Release acknowledged skb's */ ucs_queue_for_each_extract(skb, &ep->tx.window, queue, UCT_UD_PSN_COMPARE(skb->neth[0].psn, <=, ack_psn)) { ucs_mpool_put(skb); } /* update window */ ep->tx.max_psn = ep->tx.acked_psn + UCT_UD_MAX_WINDOW; }
static ucs_status_t uct_ugni_ep_get_composed_fma_rdma(uct_ep_h tl_ep, void *buffer, size_t length, uct_mem_h memh, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_ugni_rdma_fetch_desc_t *fma = NULL; uct_ugni_rdma_fetch_desc_t *rdma = NULL; uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); size_t fma_length, rdma_length, aligned_fma_remote_start; uint64_t fma_remote_start, rdma_remote_start; ucs_status_t post_result; rdma_length = length - iface->config.fma_seg_size; fma_length = iface->config.fma_seg_size; UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get_buffer, fma, return UCS_ERR_NO_RESOURCE); UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get, rdma, {ucs_mpool_put(fma);return UCS_ERR_NO_RESOURCE;});
/* * * Helper function for buffer-copy post. * Adds the descriptor to the callback queue. */ static UCS_F_ALWAYS_INLINE ucs_status_t uct_rc_mlx5_ep_bcopy_post(uct_rc_mlx5_ep_t *ep, unsigned opcode, unsigned length, /* SEND */ uint8_t am_id, void *am_hdr, unsigned am_hdr_len, /* RDMA */ uint64_t rdma_raddr, uct_rkey_t rdma_rkey, int force_sig, uct_rc_iface_send_desc_t *desc, ucs_status_t success) { ucs_status_t status; desc->super.sn = ep->tx.sw_pi; status = uct_rc_mlx5_ep_dptr_post(ep, opcode, desc + 1, length, &desc->lkey, am_id, am_hdr, am_hdr_len, rdma_raddr, rdma_rkey, 0, 0, 0, force_sig); if (status != UCS_OK) { ucs_mpool_put(desc); return status; } ucs_queue_push(&ep->super.outstanding, &desc->super.queue); return success; }
static void uct_ugni_unalign_rdma_composed_cb(uct_completion_t *self, ucs_status_t status) { uct_ugni_rdma_fetch_desc_t *rdma = (uct_ugni_rdma_fetch_desc_t *) ucs_container_of(self, uct_ugni_rdma_fetch_desc_t, tmp); uct_ugni_rdma_fetch_desc_t *head_fma = rdma->head; head_fma->network_completed_bytes += rdma->super.desc.length; ucs_assert(head_fma->network_completed_bytes <= rdma->expected_bytes); /* Check if messages is completed */ if (head_fma->network_completed_bytes == head_fma->expected_bytes) { assemble_composed_unaligned(head_fma); uct_ugni_invoke_orig_comp(head_fma, status); head_fma->super.not_ready_to_free = 0; ucs_mpool_put(head_fma); } else { rdma->super.not_ready_to_free = 1; } }
void uct_ugni_progress(void *arg) { gni_cq_entry_t event_data = 0; gni_post_descriptor_t *event_post_desc_ptr; uct_ugni_base_desc_t *desc; uct_ugni_iface_t * iface = (uct_ugni_iface_t *)arg; gni_return_t ugni_rc; ugni_rc = GNI_CqGetEvent(iface->local_cq, &event_data); if (GNI_RC_NOT_DONE == ugni_rc) { return; } if ((GNI_RC_SUCCESS != ugni_rc && !event_data) || GNI_CQ_OVERRUN(event_data)) { ucs_error("GNI_CqGetEvent falied. Error status %s %d ", gni_err_str[ugni_rc], ugni_rc); return; } ugni_rc = GNI_GetCompleted(iface->local_cq, event_data, &event_post_desc_ptr); if (GNI_RC_SUCCESS != ugni_rc && GNI_RC_TRANSACTION_ERROR != ugni_rc) { ucs_error("GNI_GetCompleted falied. Error status %s %d %d", gni_err_str[ugni_rc], ugni_rc, GNI_RC_TRANSACTION_ERROR); return; } desc = (uct_ugni_base_desc_t *)event_post_desc_ptr; ucs_trace_async("Completion received on %p", desc); if (NULL != desc->comp_cb) { uct_invoke_completion(desc->comp_cb); } --iface->outstanding; --desc->ep->outstanding; if (ucs_likely(desc->not_ready_to_free == 0)) { ucs_mpool_put(desc); } return; }
static UCS_F_ALWAYS_INLINE unsigned uct_cuda_copy_progress_event_queue(ucs_queue_head_t *event_queue, unsigned max_events) { unsigned count = 0; cudaError_t result = cudaSuccess; uct_cuda_copy_event_desc_t *cuda_event; ucs_queue_iter_t iter; ucs_queue_for_each_safe(cuda_event, iter, event_queue, queue) { result = cudaEventQuery(cuda_event->event); if (cudaSuccess != result) { break; } ucs_queue_del_iter(event_queue, iter); if (cuda_event->comp != NULL) { uct_invoke_completion(cuda_event->comp, UCS_OK); } ucs_trace_poll("CUDA Event Done :%p", cuda_event); ucs_mpool_put(cuda_event); count++; if (count >= max_events) { break; } }
static UCS_F_ALWAYS_INLINE ucs_status_t uct_ugni_smsg_ep_am_common_send(uct_ugni_smsg_ep_t *ep, uct_ugni_smsg_iface_t *iface, uint8_t am_id, unsigned header_length, void *header, unsigned payload_length, void *payload, uct_ugni_smsg_desc_t *desc) { gni_return_t gni_rc; desc->msg_id = iface->smsg_id++; desc->ep = &ep->super; gni_rc = GNI_SmsgSendWTag(ep->super.ep, header, header_length, payload, payload_length, desc->msg_id, am_id); if(GNI_RC_SUCCESS != gni_rc){ ucs_mpool_put(desc); return UCS_ERR_NO_RESOURCE; } ++ep->super.outstanding; ++iface->super.outstanding; sglib_hashed_uct_ugni_smsg_desc_t_add(iface->smsg_list, desc); return UCS_OK; }
static UCS_CLASS_INIT_FUNC(uct_ugni_smsg_iface_t, uct_md_h md, uct_worker_h worker, const uct_iface_params_t *params, const uct_iface_config_t *tl_config) { uct_ugni_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_iface_config_t); ucs_status_t status; gni_return_t ugni_rc; unsigned int bytes_per_mbox; gni_smsg_attr_t smsg_attr; pthread_mutex_lock(&uct_ugni_global_lock); UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, md, worker, params, &uct_ugni_smsg_iface_ops, &config->super UCS_STATS_ARG(NULL)); /* Setting initial configuration */ self->config.smsg_seg_size = 2048; self->config.rx_headroom = params->rx_headroom; self->config.smsg_max_retransmit = 16; self->config.smsg_max_credit = 8; self->smsg_id = 0; smsg_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT; smsg_attr.mbox_maxcredit = self->config.smsg_max_credit; smsg_attr.msg_maxsize = self->config.smsg_seg_size; ugni_rc = GNI_SmsgBufferSizeNeeded(&(smsg_attr), &bytes_per_mbox); self->bytes_per_mbox = ucs_align_up_pow2(bytes_per_mbox, ucs_get_page_size()); if (ugni_rc != GNI_RC_SUCCESS) { ucs_error("Smsg buffer size calculation failed"); status = UCS_ERR_INVALID_PARAM; goto exit; } status = ucs_mpool_init(&self->free_desc, 0, self->config.smsg_seg_size + sizeof(uct_ugni_smsg_desc_t), 0, UCS_SYS_CACHE_LINE_SIZE, /* alignment */ 128 , /* grow */ config->mpool.max_bufs, /* max buffers */ &uct_ugni_smsg_desc_mpool_ops, "UGNI-SMSG-DESC"); if (UCS_OK != status) { ucs_error("Desc Mpool creation failed"); goto exit; } status = ucs_mpool_init(&self->free_mbox, 0, self->bytes_per_mbox + sizeof(uct_ugni_smsg_mbox_t), sizeof(uct_ugni_smsg_mbox_t), UCS_SYS_CACHE_LINE_SIZE, /* alignment */ 128, /* grow */ config->mpool.max_bufs, /* max buffers */ &uct_ugni_smsg_mbox_mpool_ops, "UGNI-SMSG-MBOX"); if (UCS_OK != status) { ucs_error("Mbox Mpool creation failed"); goto clean_desc; } UCT_TL_IFACE_GET_TX_DESC(&self->super.super, &self->free_desc, self->user_desc, self->user_desc = NULL); status = ugni_smsg_activate_iface(self); if (UCS_OK != status) { ucs_error("Failed to activate the interface"); goto clean_mbox; } ugni_rc = GNI_SmsgSetMaxRetrans(self->super.nic_handle, self->config.smsg_max_retransmit); if (ugni_rc != GNI_RC_SUCCESS) { ucs_error("Smsg setting max retransmit count failed."); status = UCS_ERR_INVALID_PARAM; goto clean_iface; } /* TBD: eventually the uct_ugni_progress has to be moved to * udt layer so each ugni layer will have own progress */ uct_worker_progress_register(worker, uct_ugni_smsg_progress, self); pthread_mutex_unlock(&uct_ugni_global_lock); return UCS_OK; clean_iface: ugni_smsg_deactivate_iface(self); clean_desc: ucs_mpool_put(self->user_desc); ucs_mpool_cleanup(&self->free_desc, 1); clean_mbox: ucs_mpool_cleanup(&self->free_mbox, 1); exit: ucs_error("Failed to activate interface"); pthread_mutex_unlock(&uct_ugni_global_lock); return status; }
static void uct_ugni_smsg_iface_release_am_desc(uct_iface_t *tl_iface, void *desc) { uct_ugni_smsg_desc_t *ugni_desc = ((uct_ugni_smsg_desc_t *)desc)-1; ucs_mpool_put(ugni_desc); }
static ucs_status_t uct_ugni_ep_get_composed_fma_rdma(uct_ep_h tl_ep, void *buffer, size_t length, uct_mem_h memh, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_ugni_rdma_fetch_desc_t *fma = NULL; uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_rdma_fetch_desc_t *rdma = NULL; size_t fma_length, rdma_length, aligned_fma_remote_start; uint64_t fma_remote_start, rdma_remote_start; ucs_status_t post_result; rdma_length = length - iface->config.fma_seg_size; fma_length = iface->config.fma_seg_size; UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get_buffer, fma, return UCS_ERR_NO_RESOURCE); UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get, rdma, return UCS_ERR_NO_RESOURCE); rdma_remote_start = remote_addr; fma_remote_start = rdma_remote_start + rdma_length; aligned_fma_remote_start = ucs_align_up_pow2(fma_remote_start, UGNI_GET_ALIGN); uct_ugni_format_get_fma(fma, GNI_POST_FMA_GET, aligned_fma_remote_start, rkey, fma_length, ep, comp, uct_ugni_unalign_fma_composed_cb, NULL, NULL); fma->tail = aligned_fma_remote_start - fma_remote_start; uct_ugni_format_unaligned_rdma(rdma, GNI_POST_RDMA_GET, buffer, rdma_remote_start, memh, rkey, rdma_length+fma->tail, ep, iface->super.local_cq, comp, uct_ugni_unalign_rdma_composed_cb); fma->head = rdma; rdma->head = fma; fma->network_completed_bytes = rdma->network_completed_bytes = 0; fma->user_buffer = rdma->user_buffer = buffer; fma->expected_bytes = rdma->expected_bytes = fma->super.desc.length + rdma->super.desc.length; ucs_trace_data("Posting split GET ZCOPY, GNI_PostFma of size %"PRIx64" (%lu) from %p to " "%p, with [%"PRIx64" %"PRIx64"] and GNI_PostRdma of size %"PRIx64" (%lu)" " from %p to %p, with [%"PRIx64" %"PRIx64"]", fma->super.desc.length, length, (void *)fma->super.desc.local_addr, (void *)fma->super.desc.remote_addr, fma->super.desc.remote_mem_hndl.qword1, fma->super.desc.remote_mem_hndl.qword2, rdma->super.desc.length, length, (void *)rdma->super.desc.local_addr, (void *)rdma->super.desc.remote_addr, rdma->super.desc.remote_mem_hndl.qword1, rdma->super.desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, ZCOPY, length); post_result = uct_ugni_post_fma(iface, ep, &(fma->super), UCS_INPROGRESS); if(post_result != UCS_OK && post_result != UCS_INPROGRESS){ ucs_mpool_put(rdma); return post_result; } return uct_ugni_post_rdma(iface, ep, &(rdma->super)); }