static UCS_CLASS_INIT_FUNC(uct_self_iface_t, uct_md_h md, uct_worker_h worker, const uct_iface_params_t *params, const uct_iface_config_t *tl_config) { ucs_status_t status; uct_self_iface_config_t *self_config = 0; ucs_trace_func("Creating a loop-back transport self=%p rxh=%lu", self, params->rx_headroom); if (strcmp(params->dev_name, UCT_SELF_NAME) != 0) { ucs_error("No device was found: %s", params->dev_name); return UCS_ERR_NO_DEVICE; } UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_self_iface_ops, md, worker, tl_config UCS_STATS_ARG(params->stats_root) UCS_STATS_ARG(UCT_SELF_NAME)); self_config = ucs_derived_of(tl_config, uct_self_iface_config_t); self->id = ucs_generate_uuid((uintptr_t)self); self->rx_headroom = params->rx_headroom; self->data_length = self_config->super.max_bcopy; self->release_desc.cb = uct_self_iface_release_desc; /* create a memory pool for data transferred */ status = uct_iface_mpool_init(&self->super, &self->msg_desc_mp, sizeof(uct_recv_desc_t) + self->rx_headroom + self->data_length, sizeof(uct_recv_desc_t) + self->rx_headroom, UCS_SYS_CACHE_LINE_SIZE, &self_config->mp, 256, ucs_empty_function, "self_msg_desc"); if (UCS_OK != status) { ucs_error("Failed to create a memory pool for the loop-back transport"); goto err; } /* set the message descriptor for the loop-back */ self->msg_cur_desc = ucs_mpool_get(&self->msg_desc_mp); VALGRIND_MAKE_MEM_DEFINED(self->msg_cur_desc, sizeof(*(self->msg_cur_desc))); if (NULL == self->msg_cur_desc) { ucs_error("Failed to get the first descriptor in loop-back MP storage"); status = UCS_ERR_NO_RESOURCE; goto destroy_mpool; } ucs_debug("Created a loop-back iface. id=0x%lx, desc=%p, len=%u, tx_hdr=%lu", self->id, self->msg_cur_desc, self->data_length, self->rx_headroom); return UCS_OK; destroy_mpool: ucs_mpool_cleanup(&self->msg_desc_mp, 1); err: return status; }
static ucs_status_ptr_t ucp_disconnect_nb_internal(ucp_ep_h ep) { ucs_status_t status; ucp_request_t *req; ucs_debug("disconnect ep %p", ep); req = ucs_mpool_get(&ep->worker->req_mp); if (req == NULL) { return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); } /* * Flush operation can be queued on the pending queue of only one of the * lanes (indicated by req->send.lane) and scheduled for completion on any * number of lanes. req->send.uct_comp.count keeps track of how many lanes * are not flushed yet, and when it reaches zero, it means all lanes are * flushed. req->send.flush.lanes keeps track of which lanes we still have * to start flush on. * If a flush is completed from a pending/completion callback, we need to * schedule slow-path callback to release the endpoint later, since a UCT * endpoint cannot be released from pending/completion callback context. */ req->flags = 0; req->status = UCS_OK; req->send.ep = ep; req->send.flush.flushed_cb = ucp_ep_disconnected; req->send.flush.lanes = UCS_MASK(ucp_ep_num_lanes(ep)); req->send.flush.cbq_elem.cb = ucp_ep_flushed_slow_path_callback; req->send.flush.cbq_elem_on = 0; req->send.lane = UCP_NULL_LANE; req->send.uct.func = ucp_ep_flush_progress_pending; req->send.uct_comp.func = ucp_ep_flush_completion; req->send.uct_comp.count = ucp_ep_num_lanes(ep); ucp_ep_flush_progress(req); if (req->send.uct_comp.count == 0) { status = req->status; ucp_ep_disconnected(req); ucs_trace_req("ep %p: releasing flush request %p, returning status %s", ep, req, ucs_status_string(status)); ucs_mpool_put(req); return UCS_STATUS_PTR(status); } ucs_trace_req("ep %p: return inprogress flush request %p (%p)", ep, req, req + 1); return req + 1; }
/* * @param [in] rsc_tli Resource index for every lane. */ static ucs_status_t ucp_wireup_msg_send(ucp_ep_h ep, uint8_t type, uint64_t tl_bitmap, const ucp_rsc_index_t *rsc_tli) { ucp_rsc_index_t rsc_index; ucp_lane_index_t lane; unsigned order[UCP_MAX_LANES + 1]; ucp_request_t* req; ucs_status_t status; void *address; ucs_assert(ep->cfg_index != (uint8_t)-1); req = ucs_mpool_get(&ep->worker->req_mp); if (req == NULL) { return UCS_ERR_NO_MEMORY; } req->flags = UCP_REQUEST_FLAG_RELEASED; req->send.ep = ep; req->send.cb = ucp_wireup_msg_send_completion; req->send.wireup.type = type; req->send.uct.func = ucp_wireup_msg_progress; /* pack all addresses */ status = ucp_address_pack(ep->worker, ep, tl_bitmap, order, &req->send.length, &address); if (status != UCS_OK) { ucs_error("failed to pack address: %s", ucs_status_string(status)); return status; } req->send.buffer = address; /* send the indices addresses that should be connected by remote side */ for (lane = 0; lane < UCP_MAX_LANES; ++lane) { rsc_index = rsc_tli[lane]; if (rsc_index != UCP_NULL_RESOURCE) { req->send.wireup.tli[lane] = ucp_wireup_address_index(order, tl_bitmap, rsc_index); } else { req->send.wireup.tli[lane] = -1; } } ucp_request_start_send(req); return UCS_OK; }
ucs_status_ptr_t ucp_tag_send_nb(ucp_ep_h ep, const void *buffer, size_t count, uintptr_t datatype, ucp_tag_t tag, ucp_send_callback_t cb) { ucs_status_t status; ucp_request_t *req; ucs_trace_req("send_nb buffer %p count %zu tag %"PRIx64" to %s", buffer, count, tag, ucp_ep_peer_name(ep)); status = ucp_tag_send_try(ep, buffer, count, datatype, tag); if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { return UCS_STATUS_PTR(status); /* UCS_OK also goes here */ } req = ucs_mpool_get(&ep->worker->req_mp); if (req == NULL) { return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); } VALGRIND_MAKE_MEM_DEFINED(req + 1, ep->worker->context->config.request.size); req->flags = 0; req->cb.send = cb; status = ucp_tag_send_start_req(ep, buffer, count, datatype, tag, req); if (status != UCS_OK) { return UCS_STATUS_PTR(status); /* UCS_OK also goes here */ } if (!(req->flags & UCP_REQUEST_FLAG_COMPLETED)) { ucp_ep_add_pending(ep, ep->uct_ep, req); ucp_worker_progress(ep->worker); } ucs_trace_req("send_nb returning request %p", req); return req + 1; }
ucs_status_t uct_rc_verbs_iface_common_init(uct_rc_verbs_iface_common_t *iface, uct_rc_iface_t *rc_iface, uct_rc_verbs_iface_common_config_t *config, uct_rc_iface_config_t *rc_config, size_t max_hdr_size) { memset(iface->inl_sge, 0, sizeof(iface->inl_sge)); ucs_status_t status; /* Configuration */ iface->config.short_desc_size = ucs_max(UCT_RC_MAX_ATOMIC_SIZE, max_hdr_size); /* Create AM headers and Atomic mempool */ status = uct_iface_mpool_init(&rc_iface->super.super, &iface->short_desc_mp, sizeof(uct_rc_iface_send_desc_t) + iface->config.short_desc_size, sizeof(uct_rc_iface_send_desc_t), UCS_SYS_CACHE_LINE_SIZE, &rc_config->super.tx.mp, rc_iface->config.tx_qp_len, uct_rc_iface_send_desc_init, "rc_verbs_short_desc"); if (status != UCS_OK) { return status; } iface->config.notag_hdr_size = 0; iface->am_inl_hdr = ucs_mpool_get(&iface->short_desc_mp); if (iface->am_inl_hdr == NULL) { ucs_error("Failed to allocate AM short header"); return UCS_ERR_NO_MEMORY; } return UCS_OK; }
static UCS_F_ALWAYS_INLINE ucs_status_t uct_cuda_copy_post_cuda_async_copy(uct_ep_h tl_ep, void *dst, void *src, size_t length, int direction, cudaStream_t stream, ucs_queue_head_t *outstanding_queue, uct_completion_t *comp) { uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cuda_copy_iface_t); uct_cuda_copy_event_desc_t *cuda_event; ucs_status_t status; if (!length) { return UCS_OK; } cuda_event = ucs_mpool_get(&iface->cuda_event_desc); if (ucs_unlikely(cuda_event == NULL)) { ucs_error("Failed to allocate cuda event object"); return UCS_ERR_NO_MEMORY; } status = UCT_CUDA_FUNC(cudaMemcpyAsync(dst, src, length, direction, stream)); if (UCS_OK != status) { return UCS_ERR_IO_ERROR; } status = UCT_CUDA_FUNC(cudaEventRecord(cuda_event->event, stream)); if (UCS_OK != status) { return UCS_ERR_IO_ERROR; } ucs_queue_push(outstanding_queue, &cuda_event->queue); cuda_event->comp = comp; ucs_trace("cuda async issued :%p dst:%p, src:%p len:%ld", cuda_event, dst, src, length); return UCS_INPROGRESS; }
void *ucs_mpool_get_grow(ucs_mpool_t *mp) { size_t chunk_size, chunk_padding; ucs_mpool_data_t *data = mp->data; ucs_mpool_chunk_t *chunk; ucs_mpool_elem_t *elem; ucs_status_t status; unsigned i; void *ptr; if (data->quota == 0) { return NULL; } chunk_size = data->chunk_size; status = data->ops->chunk_alloc(mp, &chunk_size, &ptr); if (status != UCS_OK) { ucs_error("Failed to allocate memory pool chunk: %s", ucs_status_string(status)); return NULL; } /* Calculate padding, and update element count according to allocated size */ chunk = ptr; chunk_padding = ucs_padding((uintptr_t)(chunk + 1) + data->align_offset, data->alignment); chunk->elems = (void*)(chunk + 1) + chunk_padding; chunk->num_elems = (chunk_size - chunk_padding - sizeof(*chunk)) / ucs_mpool_elem_total_size(data); ucs_debug("mpool %s: allocated chunk %p of %lu bytes with %u elements", ucs_mpool_name(mp), chunk, chunk_size, chunk->num_elems); for (i = 0; i < chunk->num_elems; ++i) { elem = ucs_mpool_chunk_elem(data, chunk, i); if (data->ops->obj_init != NULL) { data->ops->obj_init(mp, elem + 1, chunk); } ucs_mpool_add_to_freelist(mp, elem, 0); if (mp->data->tail == NULL) { mp->data->tail = elem; } } chunk->next = data->chunks; data->chunks = chunk; if (data->quota == UINT_MAX) { /* Infinite memory pool */ } else if (data->quota >= chunk->num_elems) { data->quota -= chunk->num_elems; } else { data->quota = 0; } VALGRIND_MAKE_MEM_NOACCESS(chunk + 1, chunk_size - sizeof(*chunk)); ucs_assert(mp->freelist != NULL); /* Should not recurse */ return ucs_mpool_get(mp); }
static uct_ugni_flush_group_t *uct_ugni_new_flush_group(uct_ugni_iface_t *iface) { return ucs_mpool_get(&iface->flush_pool); }