static UCS_CLASS_CLEANUP_FUNC(uct_cm_iface_t) { ucs_trace_func(""); ucs_async_unset_event_handler(self->cmdev->fd); uct_cm_enter(self); while (self->num_outstanding > 0) { ib_cm_destroy_id(uct_cm_iface_outstanding_pop(self)); } ib_cm_destroy_id(self->listen_id); ib_cm_close_device(self->cmdev); uct_cm_leave(self); ucs_free(self->outstanding); }
static UCS_CLASS_CLEANUP_FUNC(uct_cm_iface_t) { ucs_trace_func(""); ucs_async_unset_event_handler(self->cmdev->fd); uct_cm_enter(self); while (self->num_outstanding > 0) { ib_cm_destroy_id(uct_cm_iface_outstanding_pop(self)); } ib_cm_destroy_id(self->listen_id); ib_cm_close_device(self->cmdev); uct_cm_leave(self); /* At this point all outstanding have been removed, and no further events * can be added. */ ucs_callbackq_remove_all(&uct_cm_iface_worker(self)->progress_q, uct_cm_iface_progress, self); ucs_free(self->outstanding); }
ssize_t uct_cm_ep_am_bcopy(uct_ep_h tl_ep, uint8_t am_id, uct_pack_callback_t pack_cb, void *arg) { uct_cm_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cm_iface_t); uct_cm_ep_t *ep = ucs_derived_of(tl_ep, uct_cm_ep_t); struct ib_cm_sidr_req_param req; struct ibv_sa_path_rec path; struct ib_cm_id *id; ucs_status_t status; uct_cm_hdr_t *hdr; size_t payload_len; size_t total_len; int ret; UCT_CHECK_AM_ID(am_id); uct_cm_enter(iface); if (iface->num_outstanding >= iface->config.max_outstanding) { status = UCS_ERR_NO_RESOURCE; goto err; } /* Allocate temporary contiguous buffer */ hdr = ucs_malloc(IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE, "cm_send_buf"); if (hdr == NULL) { status = UCS_ERR_NO_MEMORY; goto err; } payload_len = pack_cb(hdr + 1, arg); hdr->am_id = am_id; hdr->length = payload_len; total_len = sizeof(*hdr) + payload_len; status = uct_cm_ep_fill_path_rec(ep, &path); if (status != UCS_OK) { goto err_free; } /* Fill SIDR request */ memset(&req, 0, sizeof req); req.path = &path; req.service_id = ep->dest_addr.id; req.timeout_ms = iface->config.timeout_ms; req.private_data = hdr; req.private_data_len = total_len; req.max_cm_retries = iface->config.retry_count; /* Create temporary ID for this message. Will be released when getting REP. */ ret = ib_cm_create_id(iface->cmdev, &id, NULL); if (ret) { ucs_error("ib_cm_create_id() failed: %m"); status = UCS_ERR_IO_ERROR; goto err_free; } uct_cm_dump_path(&path); ret = ib_cm_send_sidr_req(id, &req); if (ret) { ucs_error("ib_cm_send_sidr_req() failed: %m"); status = UCS_ERR_IO_ERROR; goto err_destroy_id; } iface->outstanding[iface->num_outstanding++] = id; UCT_TL_EP_STAT_OP(&ep->super, AM, BCOPY, payload_len); uct_cm_leave(iface); uct_cm_iface_trace_data(iface, UCT_AM_TRACE_TYPE_SEND, hdr, "TX: SIDR_REQ [dlid %d svc 0x%"PRIx64"]", ntohs(path.dlid), req.service_id); ucs_free(hdr); return payload_len; err_destroy_id: ib_cm_destroy_id(id); err_free: ucs_free(hdr); err: uct_cm_leave(iface); return status; }
static UCS_CLASS_INIT_FUNC(uct_cm_iface_t, uct_pd_h pd, uct_worker_h worker, const char *dev_name, size_t rx_headroom, const uct_iface_config_t *tl_config) { uct_cm_iface_config_t *config = ucs_derived_of(tl_config, uct_cm_iface_config_t); ucs_status_t status; int ret; ucs_trace_func(""); UCS_CLASS_CALL_SUPER_INIT(uct_ib_iface_t, &uct_cm_iface_ops, pd, worker, dev_name, rx_headroom, 0 /* rx_priv_len */, 0 /* rx_hdr_len */, 1 /* tx_cq_len */, IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE, /* mss */ &config->super); if (worker->async == NULL) { ucs_error("cm must have async!=NULL"); return UCS_ERR_INVALID_PARAM; } self->service_id = (uint32_t)(ucs_generate_uuid((uintptr_t)self) & (~IB_CM_ASSIGN_SERVICE_ID_MASK)); self->num_outstanding = 0; self->config.timeout_ms = (int)(config->timeout * 1e3 + 0.5); self->config.max_outstanding = config->max_outstanding; self->config.retry_count = ucs_min(config->retry_count, UINT8_MAX); self->notify_q.head = NULL; ucs_queue_head_init(&self->notify_q); self->outstanding = ucs_calloc(self->config.max_outstanding, sizeof(*self->outstanding), "cm_outstanding"); if (self->outstanding == NULL) { status = UCS_ERR_NO_MEMORY; goto err; } self->cmdev = ib_cm_open_device(uct_ib_iface_device(&self->super)->ibv_context); if (self->cmdev == NULL) { ucs_error("ib_cm_open_device() failed: %m. Check if ib_ucm.ko module is loaded."); status = UCS_ERR_NO_DEVICE; goto err_free_outstanding; } status = ucs_sys_fcntl_modfl(self->cmdev->fd, O_NONBLOCK, 0); if (status != UCS_OK) { goto err_close_device; } ret = ib_cm_create_id(self->cmdev, &self->listen_id, self); if (ret) { ucs_error("ib_cm_create_id() failed: %m"); status = UCS_ERR_NO_DEVICE; goto err_close_device; } ret = ib_cm_listen(self->listen_id, self->service_id, 0); if (ret) { ucs_error("ib_cm_listen() failed: %m"); status = UCS_ERR_INVALID_ADDR; goto err_destroy_id; } if (config->async_mode == UCS_ASYNC_MODE_SIGNAL) { ucs_warn("ib_cm fd does not support SIGIO"); } status = ucs_async_set_event_handler(config->async_mode, self->cmdev->fd, POLLIN, uct_cm_iface_event_handler, self, worker->async); if (status != UCS_OK) { ucs_error("failed to set event handler"); goto err_destroy_id; } ucs_debug("listening for SIDR service_id 0x%x on fd %d", self->service_id, self->cmdev->fd); return UCS_OK; err_destroy_id: ib_cm_destroy_id(self->listen_id); err_close_device: ib_cm_close_device(self->cmdev); err_free_outstanding: ucs_free(self->outstanding); err: return status; }
static void uct_cm_iface_event_handler(void *arg) { uct_cm_iface_t *iface = arg; struct ib_cm_event *event; struct ib_cm_id *id; int destroy_id; int ret; ucs_trace_func(""); for (;;) { /* Fetch all events */ ret = ib_cm_get_event(iface->cmdev, &event); if (ret) { if (errno != EAGAIN) { ucs_warn("ib_cm_get_event() failed: %m"); } return; } id = event->cm_id; /* Handle the event */ switch (event->event) { case IB_CM_SIDR_REQ_ERROR: ucs_error("SIDR request error, status: %s", ibv_wc_status_str(event->param.send_status)); destroy_id = 1; break; case IB_CM_SIDR_REQ_RECEIVED: uct_cm_iface_handle_sidr_req(iface, event); destroy_id = 1; /* Destroy the ID created by the driver */ break; case IB_CM_SIDR_REP_RECEIVED: ucs_trace_data("RX: SIDR_REP [id %p{%u}]", id, id->handle); uct_cm_iface_outstanding_remove(iface, id); destroy_id = 1; /* Destroy the ID which was used for sending */ break; default: ucs_warn("Unexpected CM event: %d", event->event); destroy_id = 0; break; } /* Acknowledge CM event, remember the id, in case we would destroy it */ ret = ib_cm_ack_event(event); if (ret) { ucs_warn("ib_cm_ack_event() failed: %m"); } /* If there is an id which should be destroyed, do it now, after * acknowledging all events. */ if (destroy_id) { ret = ib_cm_destroy_id(id); if (ret) { ucs_error("ib_cm_destroy_id() failed: %m"); } } uct_cm_iface_notify(iface); } }