void proxy_close(struct context *ctx, struct conn *conn) { rstatus_t status; ASSERT(!conn->client && conn->proxy); if (conn->sd < 0) { conn->unref(conn); conn_put(conn); return; } ASSERT(conn->rmsg == NULL); ASSERT(conn->smsg == NULL); ASSERT(TAILQ_EMPTY(&conn->imsg_q)); ASSERT(TAILQ_EMPTY(&conn->omsg_q)); conn->unref(conn); status = close(conn->sd); if (status < 0) { log_error("close p %d failed, ignored: %s", conn->sd, strerror(errno)); } conn->sd = -1; conn_put(conn); }
void accept_cb(struct wx_worker_s* wk) { struct conn_s* conn = conn_get(); if (conn == NULL) { wx_err("no more free connction"); return; } int cfd = wx_accept(wk->listen_fd, NULL, 0); if (cfd < 0) { conn_put(conn); return; } int p = fcntl(cfd, F_GETFL); if (-1 == p || -1 == fcntl(cfd, F_SETFL, p|O_NONBLOCK)) { wx_err("fcntl"); conn_put(conn); return; } int one = 1; setsockopt(cfd, SOL_TCP, TCP_NODELAY, &one, sizeof(one)); conn->buf = (struct wx_buf_s*)conn->data; conn->buf->base = conn->buf->data; conn->buf->size = sizeof(conn->data) - sizeof(struct wx_buf_s); wx_conn_read_start(&conn->wx_conn, cfd); }
int isert_pdu_sent(struct iscsi_cmnd *pdu) { struct iscsi_conn *conn = pdu->conn; int res = 0; TRACE_ENTRY(); if (unlikely(pdu->should_close_conn)) { if (pdu->should_close_all_conn) { struct iscsi_target *target = pdu->conn->session->target; PRINT_INFO("Closing all connections for target %x at " "initiator's %s request", target->tid, conn->session->initiator_name); mutex_lock(&target->target_mutex); target_del_all_sess(target, 0); mutex_unlock(&target->target_mutex); } else { PRINT_INFO("Closing connection %p at initiator's %s " "request", conn, conn->session->initiator_name); mark_conn_closed(conn); } } /* we may get NULL parent req for login response */ if (likely(pdu->parent_req)) { rsp_cmnd_release(pdu); conn_put(conn); } TRACE_EXIT_RES(res); return res; }
/* * Worker thread new connection event loop * * Processes an incoming "handle a new connection" item. This is called when * input arrives on the libevent wakeup pipe. Each libevent instance has a * wakeup pipe, which other threads (dispatcher thread) uses to signal that * they've put a new connection on its queue. */ static void thread_libevent_process(int fd, short which, void *arg) { struct thread_worker *t = arg; char buf[1]; ssize_t n; struct conn *c; int status; n = read(fd, buf, 1); if (n < 0) { log_warn("read from notify pipe %d failed: %s", fd, strerror(errno)); } c = conn_cq_pop(&t->new_cq); if (c == NULL) { return; } c->thread = t; status = conn_set_event(c, t->base); if (status != MC_OK) { close(c->sd); conn_put(c); } }
static void process_connect_reject(struct rdma_cm_event *event, conn_t *conn) { pthread_mutex_lock(&conn->mutex); if (event->status == 28) { /* 28 = Consumer Reject. The remote side called rdma_reject, * so there is a payload. */ const struct cm_priv_reject *rej = event->param.conn.private_data; if (rej->reason == REJECT_REASON_CONNECTED || rej->reason == REJECT_REASON_CONNECTING) { /* Both sides tried to connect at the same time. This is * good. */ pthread_mutex_unlock(&conn->mutex); return; } } /* That's bad, and that should not happen. */ conn->state = CONN_STATE_DISCONNECTED; pthread_cond_broadcast(&conn->move_wait); rdma_destroy_qp(conn->rdma.cm_id); pthread_mutex_unlock(&conn->mutex); conn_put(conn); }
/* send then recv 1 message with rank */ static int send_recv_msg(struct message *msg, int rank) { int ret = -1, i = 2; char port[HOST_NAME_MAX]; struct sockconn conn; memset(&conn, 0, sizeof(conn)); BUG(!msg); BUG(rank > node_file_entries - 1); snprintf(port, HOST_NAME_MAX, "%d", node_file[rank].ocm_port); if (conn_connect(&conn, node_file[rank].ip_eth, port)) goto out; while (i-- > 0) { switch (i) { case 1: ret = conn_put(&conn, msg, sizeof(*msg)); break; case 0: ret = conn_get(&conn, msg, sizeof(*msg)); break; } if (--ret < 0) /* 0 means remote closed; turn into error condition */ break; } if (ret < 0 || (ret = conn_close(&conn))) goto out; ret = 0; out: /* TODO close connection on error */ return ret; }
/** * @brief Initialize shared memory resources. * * @param[in] ni * * @return status */ static int PtlNIInit_shmem(gbl_t *gbl, ni_t *ni) { ni->shmem.knem_fd = -1; ni->shmem.comm_pad = MAP_FAILED; /* Only if IB hasn't setup the NID first. */ if (ni->iface->id.phys.nid == PTL_NID_ANY) { ni->iface->id.phys.nid = 0; } if (ni->iface->id.phys.pid == PTL_PID_ANY) ni->iface->id.phys.pid = getpid(); ni->id.phys.nid = ni->iface->id.phys.nid; if (ni->id.phys.pid == PTL_PID_ANY) ni->id.phys.pid = ni->iface->id.phys.pid; ptl_info("SharedMEM nid : %i pid: %i \n", ni->id.phys.nid, ni->id.phys.pid); if (ni->options & PTL_NI_PHYSICAL) { int err; conn_t *conn; /* Used later to setup the buffers. */ ni->mem.index = 0; ni->mem.node_size = 1; err = setup_commpad(ni); if (unlikely(err)) { WARN(); return err; } /* Physical interface. We are connected to ourselves. */ conn = get_conn(ni, ni->id); if (!conn) { /* It's hard to recover from here. */ WARN(); return PTL_ARG_INVALID; } conn->transport = transport_shmem; //for physical addressing we need to make a connection shmem_init_connect(ni, conn); conn_put(conn); /* from get_conn */ } return PTL_OK; }
rstatus_t conn_close(struct conn *conn) { rstatus_t status; struct mbuf *mbuf, *nbuf; /* current and next mbuf */ if (conn->fd < 0) { conn_put(conn); return NC_OK; } if (!STAILQ_EMPTY(&conn->recv_queue)) { log_warn("close conn %d discard data in send_queue", conn->fd); for (mbuf = STAILQ_FIRST(&conn->recv_queue); mbuf != NULL; mbuf = nbuf) { nbuf = STAILQ_NEXT(mbuf, next); mbuf_remove(&conn->recv_queue, mbuf); mbuf_put(mbuf); } } if (!STAILQ_EMPTY(&conn->send_queue)) { log_warn("close conn %d discard data in send_queue", conn->fd); for (mbuf = STAILQ_FIRST(&conn->send_queue); mbuf != NULL; mbuf = nbuf) { nbuf = STAILQ_NEXT(mbuf, next); mbuf_remove(&conn->send_queue, mbuf); mbuf_put(mbuf); } } status = close(conn->fd); if (status < 0) { log_error("close c %d failed, ignored: %s", conn->fd, strerror(errno)); } conn->fd = -1; conn_put(conn); return NC_OK; }
/* Cleanup a connection. */ static void destroy_conn(void *data) { conn_t *conn = data; #if WITH_TRANSPORT_IB if (conn->transport.type == CONN_TYPE_RDMA) { assert(conn->state == CONN_STATE_DISCONNECTED); if (conn->rdma.cm_id) { rdma_destroy_id(conn->rdma.cm_id); conn->rdma.cm_id = NULL; } } #endif conn_put(conn); }
void conn_close(struct conn *c) { /* delete the event, the socket and the conn */ event_del(&c->event); log_debug(LOG_VVERB, "<%d connection closed", c->sd); close(c->sd); core_accept_conns(true); conn_cleanup(c); conn_put(c); stats_thread_decr(conn_curr); return; }
static void client_unref_internal_try_put(struct conn *conn) { ASSERT(conn->waiting_to_unref); unsigned long msgs = dictSize(conn->outstanding_msgs_dict); if (msgs != 0) { log_warn("conn %p Waiting for %lu outstanding messages", conn, msgs); return; } struct server_pool *pool; ASSERT(conn->owner != NULL); pool = conn->owner; conn->owner = NULL; dictRelease(conn->outstanding_msgs_dict); conn->waiting_to_unref = 0; log_warn("unref conn %p owner %p from pool '%.*s'", conn, pool, pool->name.len, pool->name.data); conn_put(conn); }
void isert_pdu_err(struct iscsi_cmnd *pdu) { struct iscsi_conn *conn = pdu->conn; if (!conn->session) /* we are still in login phase */ return; if (pdu->parent_req) { rsp_cmnd_release(pdu); conn_put(conn); } else { /* * we will get multiple pdu errors * for same PDU with multiple RDMAs case */ if (pdu->on_write_timeout_list) req_cmnd_release_force(pdu); } }
/* send 1 message to rank */ static int send_msg(struct message *msg, int rank) { int ret = -1; char port[HOST_NAME_MAX]; struct sockconn conn; memset(&conn, 0, sizeof(conn)); BUG(!msg); BUG(rank > node_file_entries - 1); snprintf(port, HOST_NAME_MAX, "%d", node_file[rank].ocm_port); if (conn_connect(&conn, node_file[rank].ip_eth, port)) goto out; ret = conn_put(&conn, msg, sizeof(*msg)); if (--ret < 0) /* 0 means remote closed; turn into error condition */ goto out; if ((ret = conn_close(&conn))) goto out; ret = 0; out: return ret; }
void server_close(struct context *ctx, struct conn *conn) { rstatus_t status; struct msg *msg, *nmsg; /* current and next message */ struct conn *c_conn; /* peer client connection */ ASSERT(!conn->client && !conn->proxy); server_close_stats(ctx, conn->owner, conn->err, conn->eof, conn->connected); if (conn->sd < 0) { server_failure(ctx, conn->owner); conn->unref(conn); conn_put(conn); return; } for (msg = TAILQ_FIRST(&conn->imsg_q); msg != NULL; msg = nmsg) { nmsg = TAILQ_NEXT(msg, s_tqe); /* dequeue the message (request) from server inq */ conn->dequeue_inq(ctx, conn, msg); /* * Don't send any error response, if * 1. request is tagged as noreply or, * 2. client has already closed its connection */ if (msg->swallow || msg->noreply) { log_debug(LOG_INFO, "close s %d swallow req %"PRIu64" len %"PRIu32 " type %d", conn->sd, msg->id, msg->mlen, msg->type); req_put(msg); } else { c_conn = msg->owner; //ASSERT(c_conn->client && !c_conn->proxy); msg->done = 1; msg->error = 1; msg->err = conn->err; msg->dyn_error = STORAGE_CONNECTION_REFUSE; if (req_done(c_conn, TAILQ_FIRST(&c_conn->omsg_q))) { event_add_out(ctx->evb, msg->owner); } log_debug(LOG_INFO, "close s %d schedule error for req %"PRIu64" " "len %"PRIu32" type %d from c %d%c %s", conn->sd, msg->id, msg->mlen, msg->type, c_conn->sd, conn->err ? ':' : ' ', conn->err ? strerror(conn->err): " "); } } ASSERT(TAILQ_EMPTY(&conn->imsg_q)); for (msg = TAILQ_FIRST(&conn->omsg_q); msg != NULL; msg = nmsg) { nmsg = TAILQ_NEXT(msg, s_tqe); /* dequeue the message (request) from server outq */ conn->dequeue_outq(ctx, conn, msg); if (msg->swallow) { log_debug(LOG_INFO, "close s %d swallow req %"PRIu64" len %"PRIu32 " type %d", conn->sd, msg->id, msg->mlen, msg->type); req_put(msg); } else { c_conn = msg->owner; //ASSERT(c_conn->client && !c_conn->proxy); msg->done = 1; msg->error = 1; msg->err = conn->err; if (req_done(c_conn, TAILQ_FIRST(&c_conn->omsg_q))) { event_add_out(ctx->evb, msg->owner); } log_debug(LOG_INFO, "close s %d schedule error for req %"PRIu64" " "len %"PRIu32" type %d from c %d%c %s", conn->sd, msg->id, msg->mlen, msg->type, c_conn->sd, conn->err ? ':' : ' ', conn->err ? strerror(conn->err): " "); } } ASSERT(TAILQ_EMPTY(&conn->omsg_q)); msg = conn->rmsg; if (msg != NULL) { conn->rmsg = NULL; ASSERT(!msg->request); ASSERT(msg->peer == NULL); rsp_put(msg); log_debug(LOG_INFO, "close s %d discarding rsp %"PRIu64" len %"PRIu32" " "in error", conn->sd, msg->id, msg->mlen); } ASSERT(conn->smsg == NULL); server_failure(ctx, conn->owner); conn->unref(conn); status = close(conn->sd); if (status < 0) { log_error("close s %d failed, ignored: %s", conn->sd, strerror(errno)); } conn->sd = -1; conn_put(conn); }
void conn_close(struct conn_s* conn) { wx_timer_stop(&conn->closetimer); wx_conn_close(&conn->wx_conn); conn_put(conn); }
/** * Process CM event. * * there is a listening rdmacm id per iface * this is called as a handler from libev * * @param[in] w * @param[in] revents */ void process_cm_event(EV_P_ ev_io *w, int revents) { struct iface *iface = w->data; ni_t *ni; struct rdma_cm_event *event; conn_t *conn; struct rdma_conn_param conn_param; struct cm_priv_request priv; struct ibv_qp_init_attr init; uintptr_t ctx; if (rdma_get_cm_event(iface->cm_channel, &event)) { WARN(); return; } /* In case of connection requests conn will be NULL. */ ctx = (uintptr_t) event->id->context; if (ctx & 1) { /* Loopback. The context is not a conn but the NI. */ ctx &= ~1; conn = NULL; ni = (void *)ctx; } else { conn = (void *)ctx; ni = conn ? conn->obj.obj_ni : NULL; } ptl_info("Rank got CM event %d for id %p\n", event->event, event->id); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: if (!conn) break; pthread_mutex_lock(&conn->mutex); if (conn->state != CONN_STATE_RESOLVING_ADDR) { /* Our connect attempt got overriden by the remote * side. */ conn_put(conn); pthread_mutex_unlock(&conn->mutex); break; } assert(conn->rdma.cm_id == event->id); conn->state = CONN_STATE_RESOLVING_ROUTE; if (rdma_resolve_route(event->id, get_param(PTL_RDMA_TIMEOUT))) { conn->state = CONN_STATE_DISCONNECTED; pthread_cond_broadcast(&conn->move_wait); conn->rdma.cm_id = NULL; conn_put(conn); } pthread_mutex_unlock(&conn->mutex); break; case RDMA_CM_EVENT_ROUTE_RESOLVED: if (!conn) break; memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 1; conn_param.initiator_depth = 1; conn_param.retry_count = 7; conn_param.rnr_retry_count = 7; conn_param.private_data = &priv; conn_param.private_data_len = sizeof(priv); pthread_mutex_lock(&conn->mutex); if (conn->state != CONN_STATE_RESOLVING_ROUTE) { /* Our connect attempt got overriden by the remote * side. */ conn_put(conn); pthread_mutex_unlock(&conn->mutex); break; } assert(conn->rdma.cm_id == event->id); /* Create the QP. */ memset(&init, 0, sizeof(init)); init.qp_context = ni; init.send_cq = ni->rdma.cq; init.recv_cq = ni->rdma.cq; init.cap.max_send_wr = ni->iface->cap.max_send_wr; init.cap.max_send_sge = ni->iface->cap.max_send_sge; init.qp_type = IBV_QPT_RC; init.srq = ni->rdma.srq; priv.src_id = ni->id; priv.options = ni->options; assert(conn->rdma.cm_id == event->id); if (rdma_create_qp(event->id, ni->iface->pd, &init)) { WARN(); conn->state = CONN_STATE_DISCONNECTED; pthread_cond_broadcast(&conn->move_wait); conn->rdma.cm_id = NULL; conn_put(conn); } else if (rdma_connect(event->id, &conn_param)) { WARN(); conn->state = CONN_STATE_DISCONNECTED; pthread_cond_broadcast(&conn->move_wait); rdma_destroy_qp(conn->rdma.cm_id); conn->rdma.cm_id = NULL; conn_put(conn); } else { conn->state = CONN_STATE_CONNECTING; } pthread_mutex_unlock(&conn->mutex); break; case RDMA_CM_EVENT_ESTABLISHED: if (!conn) { /* Self connection. Let the initiator side finish the * connection. */ break; } pthread_mutex_lock(&conn->mutex); atomic_inc(&ni->rdma.num_conn); if (conn->state != CONN_STATE_CONNECTING) { pthread_mutex_unlock(&conn->mutex); break; } assert(conn->rdma.cm_id == event->id); get_qp_param(conn); conn->state = CONN_STATE_CONNECTED; pthread_cond_broadcast(&conn->move_wait); pthread_mutex_unlock(&conn->mutex); break; case RDMA_CM_EVENT_CONNECT_REQUEST: process_connect_request(iface, event); break; case RDMA_CM_EVENT_REJECTED: if (!conn) break; process_connect_reject(event, conn); break; case RDMA_CM_EVENT_DISCONNECTED: if (!conn) { /* That should be the loopback connection only. */ assert(ni->rdma.self_cm_id == event->id); rdma_disconnect(ni->rdma.self_cm_id); rdma_destroy_qp(ni->rdma.self_cm_id); break; } pthread_mutex_lock(&conn->mutex); assert(conn->state != CONN_STATE_DISCONNECTED); if (conn->state != CONN_STATE_DISCONNECTING) { /* Not disconnecting yet, so we have to disconnect too. */ rdma_disconnect(conn->rdma.cm_id); rdma_destroy_qp(conn->rdma.cm_id); } conn->state = CONN_STATE_DISCONNECTED; pthread_cond_broadcast(&conn->move_wait); atomic_dec(&ni->rdma.num_conn); pthread_mutex_unlock(&conn->mutex); break; case RDMA_CM_EVENT_CONNECT_ERROR: if (!conn) break; pthread_mutex_lock(&conn->mutex); if (conn->state != CONN_STATE_DISCONNECTED) { conn->state = CONN_STATE_DISCONNECTED; pthread_cond_broadcast(&conn->move_wait); conn->rdma.cm_id->context = NULL; rdma_destroy_qp(conn->rdma.cm_id); pthread_mutex_unlock(&conn->mutex); conn_put(conn); } else { pthread_mutex_unlock(&conn->mutex); } break; case RDMA_CM_EVENT_TIMEWAIT_EXIT: break; default: ptl_warn("Got unexpected CM event: %d\n", event->event); break; } rdma_ack_cm_event(event); }
void client_close(struct context *ctx, struct conn *conn) { rstatus_t status; struct msg *msg, *nmsg; /* current and next message */ ASSERT(conn->client && !conn->proxy); client_close_stats(ctx, conn->owner, conn->err, conn->eof); if (conn->sd < 0) { conn->unref(conn); conn_put(conn); return; } msg = conn->rmsg; if (msg != NULL) { conn->rmsg = NULL; ASSERT(msg->peer == NULL); ASSERT(msg->request && !msg->done); log_debug(LOG_INFO, "close c %d discarding pending req %"PRIu64" len " "%"PRIu32" type %d", conn->sd, msg->id, msg->mlen, msg->type); req_put(msg); } ASSERT(conn->smsg == NULL); ASSERT(TAILQ_EMPTY(&conn->imsg_q)); for (msg = TAILQ_FIRST(&conn->omsg_q); msg != NULL; msg = nmsg) { nmsg = TAILQ_NEXT(msg, c_tqe); /* dequeue the message (request) from client outq */ conn->dequeue_outq(ctx, conn, msg); if (msg->done) { log_debug(LOG_INFO, "close c %d discarding %s req %"PRIu64" len " "%"PRIu32" type %d", conn->sd, msg->error ? "error": "completed", msg->id, msg->mlen, msg->type); req_put(msg); } else { //如果客户端请求已经转到后端取了还没有得到应答,这时候proxy和客户端关闭连接,则会走到这里 msg->swallow = 1; ASSERT(msg->request); ASSERT(msg->peer == NULL); log_debug(LOG_INFO, "close c %d schedule swallow of req %"PRIu64" " "len %"PRIu32" type %d", conn->sd, msg->id, msg->mlen, msg->type); } } ASSERT(TAILQ_EMPTY(&conn->omsg_q)); conn->unref(conn); status = close(conn->sd); if (status < 0) { log_error("close c %d failed, ignored: %s", conn->sd, strerror(errno)); } conn->sd = -1; conn_put(conn); }
/** * Process RC connection request event. * * @param[in] iface * @param[in] event * * @return status */ static void process_connect_request(struct iface *iface, struct rdma_cm_event *event) { const struct cm_priv_request *priv; struct cm_priv_reject rej; conn_t *conn; int ret = 0; int c; ni_t *ni; if (!event->param.conn.private_data || (event->param.conn.private_data_len < sizeof(struct cm_priv_request))) { rej.reason = REJECT_REASON_BAD_PARAM; goto reject; } priv = event->param.conn.private_data; ni = iface->ni[ni_options_to_type(priv->options)]; if (!ni) { rej.reason = REJECT_REASON_NO_NI; goto reject; } conn = get_conn(ni, priv->src_id); if (!conn) { WARN(); rej.reason = REJECT_REASON_ERROR; goto reject; } pthread_mutex_lock(&conn->mutex); switch (conn->state) { case CONN_STATE_CONNECTED: /* We received a connection request but we are already connected. Reject it. */ rej.reason = REJECT_REASON_CONNECTED; pthread_mutex_unlock(&conn->mutex); conn_put(conn); goto reject; break; case CONN_STATE_DISCONNECTED: /* we received a connection request and we are disconnected * - accept it */ ret = accept_connection_request(ni, conn, event); break; case CONN_STATE_DISCONNECTING: /* Not sure how to handle that case. Ignore and disconnect * anyway? */ rej.reason = REJECT_REASON_DISCONNECTING; pthread_mutex_unlock(&conn->mutex); conn_put(conn); goto reject; break; case CONN_STATE_RESOLVING_ADDR: case CONN_STATE_RESOLVING_ROUTE: case CONN_STATE_CONNECTING: /* we received a connection request but we are already connecting * - accept connection from higher id * - reject connection from lower id * - accept connection from self, but cleanup */ c = compare_id(&priv->src_id, &ni->id); if (c > 0) ret = accept_connection_request(ni, conn, event); else if (c < 0) { rej.reason = REJECT_REASON_CONNECTING; pthread_mutex_unlock(&conn->mutex); conn_put(conn); goto reject; } else { ret = accept_connection_self(ni, conn, event); } break; } pthread_mutex_unlock(&conn->mutex); conn_put(conn); return; reject: rdma_reject(event->id, &rej, sizeof(rej)); return; }
/** * Get connection info for a given process id. * * For logical NIs the connection is contained in the rank table. * For physical NIs the connection is held in a binary tree using * the ID as a sorting value. * * For physical NIs if this is the first time we are sending a message * to this process create a new conn_t. For logical NIs the conn_t * structs are all allocated when the rank table is loaded. * * @param[in] ni the NI from which to get the connection * @param[in] id the process ID to lookup * * @return the conn_t and takes a reference on it */ conn_t *get_conn(ni_t *ni, ptl_process_t id) { conn_t *conn; void **ret; if (ni->options & PTL_NI_LOGICAL) { if (unlikely(id.rank >= ni->logical.map_size)) { ptl_warn("Invalid rank (%d >= %d)\n", id.rank, ni->logical.map_size); return NULL; } conn = ni->logical.rank_table[id.rank].connect; conn_get(conn); } else { conn_t conn_search; PTL_FASTLOCK_LOCK(&ni->physical.lock); /* lookup in binary tree */ conn_search.id = id; ret = tfind(&conn_search, &ni->physical.tree, compare_conn_id); if (ret) { conn = *ret; conn_get(conn); } else { /* Not found. Allocate and insert. */ if (conn_alloc(ni, &conn)) { PTL_FASTLOCK_UNLOCK(&ni->physical.lock); WARN(); return NULL; } #if IS_PPE || WITH_TRANSPORT_SHMEM //need to connect local processes over shared memory if (conn->id.phys.nid == ni->iface->id.phys.nid) { if (get_param(PTL_ENABLE_MEM)) { #if IS_PPE conn->transport = transport_mem; #elif WITH_TRANSPORT_SHMEM conn->transport = transport_shmem; #endif conn->state = CONN_STATE_CONNECTED; } } #endif conn->id = id; /* Get the IP address from the NID. */ conn->sin.sin_family = AF_INET; conn->sin.sin_addr.s_addr = nid_to_addr(id.phys.nid); conn->sin.sin_port = pid_to_port(id.phys.pid); /* insert new conn into binary tree */ ret = tsearch(conn, &ni->physical.tree, compare_conn_id); if (!ret) { WARN(); conn_put(conn); conn = NULL; } else { conn_get(conn); } } PTL_FASTLOCK_UNLOCK(&ni->physical.lock); } return conn; }
static int batch_deliver(struct cuda_rpc *rpc, struct cuda_packet *return_pkt) { int exit_errno; struct cuda_pkt_batch *batch = &rpc->batch; size_t payload_len = 0UL; struct flush *f; struct timer t; printd(DBG_INFO, "pkts = %lu size = %lu\n", batch->header.num_pkts, batch->header.bytes_used); timer_init(CLOCK_REALTIME, &t); f = &flushes[num_flushes]; clock_gettime(CLOCK_REALTIME, &f->ts); f->bytes = sizeof(batch->header); f->blocking = false; FAIL_ON_CONN_ERR( conn_put(&rpc->sockconn, &batch->header, sizeof(batch->header)) ); #if defined(NIC_SDP) f = &flushes[++num_flushes]; clock_gettime(CLOCK_REALTIME, &f->ts); f->bytes = batch->header.bytes_used + ZCPY_TRIGGER_SZ; timer_start(&t); // ignored if batch is non-blocking FAIL_ON_CONN_ERR( conn_put(&rpc->sockconn, batch->buffer, batch->header.bytes_used + ZCPY_TRIGGER_SZ) ); #else FAIL_ON_CONN_ERR( conn_put(&rpc->sockconn, batch->buffer, batch->header.bytes_used) ); #endif #ifndef NO_PIPELINING if (last_pkt(rpc)->is_sync) { /* only expect a return packet if last is sync */ #endif #if defined(NIC_SDP) f->blocking = true; f->bytes += sizeof(*return_pkt) + ZCPY_TRIGGER_SZ; FAIL_ON_CONN_ERR( conn_get(&rpc->sockconn, return_pkt, sizeof(*return_pkt) + ZCPY_TRIGGER_SZ) ); #else FAIL_ON_CONN_ERR( conn_get(&rpc->sockconn, return_pkt, sizeof(*return_pkt)) ); #endif payload_len = return_pkt->len - sizeof(*return_pkt); if (payload_len > 0) { #if defined(NIC_SDP) f->bytes += payload_len + ZCPY_TRIGGER_SZ; f->has_ret_payload = true; FAIL_ON_CONN_ERR( conn_get(&rpc->sockconn, (return_pkt + 1), payload_len + ZCPY_TRIGGER_SZ) ); #else FAIL_ON_CONN_ERR( conn_get(&rpc->sockconn, (return_pkt + 1), payload_len) ); #endif } #ifndef NO_PIPELINING f->lat = timer_end(&t, MICROSECONDS); f->exec = return_pkt->execlat; } #endif ++num_flushes; batch_clear(batch); return 0; fail: return exit_errno; }
/* <-- process requests from other daemons */ static void * inbound_thread(void *arg) { struct sockconn *conn = (struct sockconn*)arg; struct message msg; int ret = 0; BUG(!conn); printd("spawned\n"); while (true) { ret = conn_get(conn, &msg, sizeof(msg)); if (ret < 1) break; printd("got msg %s\n", MSG_TYPE2STR(msg.type)); if (msg.type == MSG_ADD_NODE) { alloc_add_node(msg.rank, &msg.u.node.config); } else if (msg.type == MSG_REQ_ALLOC) { //Currently only rank 0 can handle inital allocation request //messages to determine the rank of the node that will fulfill //the allocation BUG(myrank != 0); msg_recv_req_alloc(&msg); ret = conn_put(conn, &msg, sizeof(msg)); if (--ret < 0) break; } else if (msg.type == MSG_DO_ALLOC) { //As remote allocations are created, assign them an identifying ID printd("Remote allocation has local ID of %lu\n", rem_alloc_id); msg.u.alloc.rem_alloc_id = rem_alloc_id; //Increment the ID for each allocation rem_alloc_id++; #ifdef INFINIBAND /* First, send msg back to orig rank to unblock app, so it can * initiate connection to us. Then listen for connections. * XXX possible race condition */ msg.u.alloc.u.rdma.port = ib_port; ib_port += 1; ret = conn_put(conn, &msg, sizeof(msg)); if (--ret < 0) break; msg_recv_do_alloc(&msg); /* blocks */ #endif #ifdef EXTOLL /* EXTOLL server allocations are nonblocking and the call to * alloc_ate should return the needed setup parameters for the client * in msg. */ msg_recv_do_alloc(&msg); /* should not block for EXTOLL setup */ ret = conn_put(conn, &msg, sizeof(msg)); #endif } else if (msg.type == MSG_DO_FREE) { printd("InboundThread received free request for allocation \n"); //Free the remote allocation msg_recv_do_free(&msg); ret = conn_put(conn, &msg, sizeof(msg)); } else if (msg.type == MSG_REQ_FREE) { //TODO - should only be received at root node and releases data structures //that hold information about this allocation ret = 0; ret = conn_put(conn, &msg, sizeof(msg)); } else { printd("unhandled message %s\n", MSG_TYPE2STR(msg.type)); BUG(1); } } printd("exiting %s\n", (ret < 0 ? "with error" : "normally")); if (ret) BUG(1); return NULL; }
void server_close(struct context *ctx, struct conn *conn) { rstatus_t status; struct msg *msg, *nmsg; /* current and next message */ ASSERT(conn->type == CONN_SERVER); server_close_stats(ctx, conn->owner, conn->err, conn->eof, conn->connected); if (conn->sd < 0) { server_failure(ctx, conn->owner); conn_unref(conn); conn_put(conn); return; } for (msg = TAILQ_FIRST(&conn->omsg_q); msg != NULL; msg = nmsg) { nmsg = TAILQ_NEXT(msg, s_tqe); /* dequeue the message (request) from server outq */ conn_dequeue_outq(ctx, conn, msg); server_ack_err(ctx, conn, msg); } ASSERT(TAILQ_EMPTY(&conn->omsg_q)); for (msg = TAILQ_FIRST(&conn->imsg_q); msg != NULL; msg = nmsg) { nmsg = TAILQ_NEXT(msg, s_tqe); /* dequeue the message (request) from server inq */ conn_dequeue_inq(ctx, conn, msg); // We should also remove the msg from the timeout rbtree. msg_tmo_delete(msg); server_ack_err(ctx, conn, msg); stats_server_incr(ctx, conn->owner, server_dropped_requests); } ASSERT(TAILQ_EMPTY(&conn->imsg_q)); msg = conn->rmsg; if (msg != NULL) { conn->rmsg = NULL; ASSERT(!msg->request); ASSERT(msg->peer == NULL); rsp_put(msg); log_debug(LOG_INFO, "close s %d discarding rsp %"PRIu64" len %"PRIu32" " "in error", conn->sd, msg->id, msg->mlen); } ASSERT(conn->smsg == NULL); server_failure(ctx, conn->owner); conn_unref(conn); status = close(conn->sd); if (status < 0) { log_error("close s %d failed, ignored: %s", conn->sd, strerror(errno)); } conn->sd = -1; conn_put(conn); }
static void dnode_client_close(struct context *ctx, struct conn *conn) { rstatus_t status; struct msg *msg, *nmsg; /* current and next message */ ASSERT(conn->type == CONN_DNODE_PEER_CLIENT); dnode_client_close_stats(ctx, conn->owner, conn->err, conn->eof); if (conn->sd < 0) { conn_unref(conn); conn_put(conn); return; } msg = conn->rmsg; if (msg != NULL) { conn->rmsg = NULL; ASSERT(msg->peer == NULL); ASSERT(msg->request && !msg->done); if (log_loggable(LOG_INFO)) { log_debug(LOG_INFO, "dyn: close c %d discarding pending req %"PRIu64" len " "%"PRIu32" type %d", conn->sd, msg->id, msg->mlen, msg->type); } dictDelete(conn->outstanding_msgs_dict, &msg->id); req_put(msg); } ASSERT(conn->smsg == NULL); ASSERT(TAILQ_EMPTY(&conn->imsg_q)); for (msg = TAILQ_FIRST(&conn->omsg_q); msg != NULL; msg = nmsg) { nmsg = TAILQ_NEXT(msg, c_tqe); /* dequeue the message (request) from client outq */ conn_dequeue_outq(ctx, conn, msg); if (msg->done) { if (log_loggable(LOG_INFO)) { log_debug(LOG_INFO, "dyn: close c %d discarding %s req %"PRIu64" len " "%"PRIu32" type %d", conn->sd, msg->error ? "error": "completed", msg->id, msg->mlen, msg->type); } dictDelete(conn->outstanding_msgs_dict, &msg->id); req_put(msg); } else { msg->swallow = 1; ASSERT(msg->request); ASSERT(msg->peer == NULL); if (log_loggable(LOG_INFO)) { log_debug(LOG_INFO, "dyn: close c %d schedule swallow of req %"PRIu64" " "len %"PRIu32" type %d", conn->sd, msg->id, msg->mlen, msg->type); } } } ASSERT(TAILQ_EMPTY(&conn->omsg_q)); conn_unref(conn); status = close(conn->sd); if (status < 0) { log_error("dyn: close c %d failed, ignored: %s", conn->sd, strerror(errno)); } conn->sd = -1; conn_put(conn); }