/* send a signal to remote interface using Unix-domain socket */ static ucs_status_t uct_mm_ep_signal_remote(uct_mm_ep_t *ep, uct_mm_iface_conn_signal_t sig) { uct_mm_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_mm_iface_t); int ret; /** * Send connect message to remote interface */ ret = sendto(iface->signal_fd, &sig, sizeof(sig), 0, (const struct sockaddr*)&ep->cached_signal_sockaddr, ep->cached_signal_addrlen); if (ret >= 0) { ucs_assert(ret == sizeof(sig)); ucs_debug("Sent connect from socket %d to %p", iface->signal_fd, (const struct sockaddr*)&ep->cached_signal_sockaddr); if (ep->cbq_elem_on) { uct_mm_ep_remove_slow_path_callback(iface, ep); } /* point the ep->fifo_ctl to the remote fifo */ uct_mm_ep_connected(ep); return UCS_OK; } else if (errno == EAGAIN) { /* If sending a signal has failed, retry. * Note that by default the receiver might have a limited backlog, * on Linux systems it is net.unix.max_dgram_qlen (10 by default). */ ucs_debug("Failed to send connect from socket %d to %p", iface->signal_fd, (const struct sockaddr*)&ep->cached_signal_sockaddr); /* If sending the Connect message failed with EAGAIN, try again later. * Don't keep trying now in a loop since this may cause a deadlock which * prevents the reading of incoming messages which blocks the remote sender. * Add the sending attempt as a callback to a slow progress. */ if ((!ep->cbq_elem_on) && (sig == UCT_MM_IFACE_SIGNAL_CONNECT)) { ep->cbq_elem.cb = uct_mm_ep_signal_remote_slow_path_callback; uct_worker_slowpath_progress_register(iface->super.worker, &ep->cbq_elem); ep->cbq_elem_on = 1; } /* Return UCS_OK in this case even though couldn't send, so that the * calling flow would release the lock and allow the reading of incoming * Connect messages. */ return UCS_OK; } else { if (errno == ECONNREFUSED) { ucs_debug("failed to send connect signal: connection refused"); } else { ucs_error("failed to send connect signal: %m"); } return UCS_ERR_IO_ERROR; } }
static int ucp_flush_check_completion(ucp_request_t *req) { ucp_ep_h ep = req->send.ep; /* Check if flushed all lanes */ if (req->send.uct_comp.count != 0) { return 0; } ucs_trace("adding slow-path callback to destroy ep %p", ep); ucp_ep_flush_slow_path_remove(req); req->send.flush.cbq_elem.cb = ucp_ep_flushed_slow_path_callback; req->send.flush.cbq_elem_on = 1; uct_worker_slowpath_progress_register(ep->worker->uct, &req->send.flush.cbq_elem); return 1; }
static ucs_status_t ucp_ep_flush_progress_pending(uct_pending_req_t *self) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); ucp_lane_index_t lane = req->send.lane; ucp_ep_h ep = req->send.ep; ucs_status_t status; int completed; ucs_assert(!(req->flags & UCP_REQUEST_FLAG_COMPLETED)); status = uct_ep_flush(ep->uct_eps[lane], 0, &req->send.uct_comp); ucs_trace("flushing ep %p lane[%d]: %s", ep, lane, ucs_status_string(status)); if (status == UCS_OK) { --req->send.uct_comp.count; /* UCT endpoint is flushed */ } /* since req->flush.pend.lane is still non-NULL, this function will not * put anything on pending. */ ucp_ep_flush_progress(req); completed = ucp_flush_check_completion(req); /* If the operation has not completed, add slow-path progress to resume */ if (!completed && req->send.flush.lanes && !req->send.flush.cbq_elem_on) { ucs_trace("ep %p: adding slow-path callback to resume flush", ep); req->send.flush.cbq_elem.cb = ucp_ep_flush_resume_slow_path_callback; req->send.flush.cbq_elem_on = 1; uct_worker_slowpath_progress_register(ep->worker->uct, &req->send.flush.cbq_elem); } if ((status == UCS_OK) || (status == UCS_INPROGRESS)) { req->send.lane = UCP_NULL_LANE; return UCS_OK; } else if (status == UCS_ERR_NO_RESOURCE) { return UCS_ERR_NO_RESOURCE; } else { ucp_ep_flush_error(req, status); return UCS_OK; } }