/* * This random exponential backoff is relied on to eventually resolve racing * connects. * * If connect attempts race then both parties drop both connections and come * here to wait for a random amount of time before trying again. Eventually * the backoff range will be so much greater than the time it takes to * establish a connection that one of the pair will establish the connection * before the other's random delay fires. * * Connection attempts that arrive while a connection is already established * are also considered to be racing connects. This lets a connection from * a rebooted machine replace an existing stale connection before the transport * notices that the connection has failed. * * We should *always* start with a random backoff; otherwise a broken connection * will always take several iterations to be re-established. */ void rds_queue_reconnect(struct rds_conn_path *cp) { unsigned long rand; struct rds_connection *conn = cp->cp_conn; rdsdebug("conn %p for %pI4 to %pI4 reconnect jiffies %lu\n", conn, &conn->c_laddr, &conn->c_faddr, cp->cp_reconnect_jiffies); /* let peer with smaller addr initiate reconnect, to avoid duels */ if (conn->c_trans->t_type == RDS_TRANS_TCP && conn->c_laddr > conn->c_faddr) return; set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); if (cp->cp_reconnect_jiffies == 0) { cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); return; } get_random_bytes(&rand, sizeof(rand)); rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n", rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies, conn, &conn->c_laddr, &conn->c_faddr); queue_delayed_work(rds_wq, &cp->cp_conn_w, rand % cp->cp_reconnect_jiffies); cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2, rds_sysctl_reconnect_max_jiffies); }
static void rds_queue_reconnect(struct rds_connection *conn) { unsigned long rand; rdsdebug("conn %p for %pI4 to %pI4 reconnect jiffies %lu\n", conn, &conn->c_laddr, &conn->c_faddr, conn->c_reconnect_jiffies); set_bit(RDS_RECONNECT_PENDING, &conn->c_flags); if (conn->c_reconnect_jiffies == 0) { conn->c_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; queue_delayed_work(rds_wq, &conn->c_conn_w, 0); return; } get_random_bytes(&rand, sizeof(rand)); rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n", rand % conn->c_reconnect_jiffies, conn->c_reconnect_jiffies, conn, &conn->c_laddr, &conn->c_faddr); queue_delayed_work(rds_wq, &conn->c_conn_w, rand % conn->c_reconnect_jiffies); conn->c_reconnect_jiffies = min(conn->c_reconnect_jiffies * 2, rds_sysctl_reconnect_max_jiffies); }
int rds_tcp_conn_connect(struct rds_connection *conn) { struct socket *sock = NULL; struct sockaddr_in src, dest; int ret; struct rds_tcp_connection *tc = conn->c_transport_data; mutex_lock(&tc->t_conn_lock); if (rds_conn_up(conn)) { mutex_unlock(&tc->t_conn_lock); return 0; } ret = sock_create_kern(rds_conn_net(conn), PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret < 0) goto out; rds_tcp_tune(sock); src.sin_family = AF_INET; src.sin_addr.s_addr = (__force u32)conn->c_laddr; src.sin_port = (__force u16)htons(0); ret = sock->ops->bind(sock, (struct sockaddr *)&src, sizeof(src)); if (ret) { rdsdebug("bind failed with %d at address %pI4\n", ret, &conn->c_laddr); goto out; } dest.sin_family = AF_INET; dest.sin_addr.s_addr = (__force u32)conn->c_faddr; dest.sin_port = (__force u16)htons(RDS_TCP_PORT); /* * once we call connect() we can start getting callbacks and they * own the socket */ rds_tcp_set_callbacks(sock, conn); ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest), O_NONBLOCK); rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret); if (ret == -EINPROGRESS) ret = 0; if (ret == 0) { rds_tcp_keepalive(sock); sock = NULL; } else { rds_tcp_restore_callbacks(sock, conn->c_transport_data); } out: mutex_unlock(&tc->t_conn_lock); if (sock) sock_release(sock); return ret; }
static void rds_recv_forward(struct rds_connection *conn, struct rds_incoming *inc, gfp_t gfp) { int len, ret; struct rds_nf_hdr *dst, *org; struct rds_sock *rs; /* initialize some bits */ rs = NULL; /* pull out the destination and original rds headers */ dst = rds_nf_hdr_dst(inc->i_skb); org = rds_nf_hdr_org(inc->i_skb); /* find the proper output socket - it should be the local one on which we originated */ rs = rds_find_bound(dst->saddr, dst->sport); if (!rs) { rdsdebug("failed to find output rds_socket dst %u.%u.%u.%u : %u, inc %p, conn %p\n", NIPQUAD(dst->daddr), dst->dport, inc, conn); rds_stats_inc(s_recv_drop_no_sock); goto out; } /* pull out the actual message len */ len = be32_to_cpu(inc->i_hdr.h_len); /* now lets see if we can send it all */ ret = rds_send_internal(conn, rs, inc->i_skb, gfp); if (len != ret) { rdsdebug("failed to send rds_data dst %u.%u.%u.%u : %u, inc %p, conn %p, len %d != ret %d\n", NIPQUAD(dst->daddr), dst->dport, inc, conn, len, ret); goto out; } if (NULL != rs) rds_sock_put(rs); /* all good so we are done */ return; out: /* cleanup any handles */ if (NULL != rs) rds_sock_put(rs); /* on error lets take a shot at hook cleanup */ NF_HOOK(PF_RDS_HOOK, NF_RDS_FORWARD_ERROR, inc->i_skb, NULL, NULL, rds_recv_ok); /* then hand the request off to normal local processing on the old connection */ rds_recv_local(inc->i_oconn, org->saddr, org->daddr, inc, gfp); }
void rds_send_worker(struct work_struct *work) { struct rds_conn_path *cp = container_of(work, struct rds_conn_path, cp_send_w.work); int ret; if (rds_conn_path_state(cp) == RDS_CONN_UP) { clear_bit(RDS_LL_SEND_FULL, &cp->cp_flags); ret = rds_send_xmit(cp); cond_resched(); rdsdebug("conn %p ret %d\n", cp->cp_conn, ret); switch (ret) { case -EAGAIN: rds_stats_inc(s_send_immediate_retry); queue_delayed_work(rds_wq, &cp->cp_send_w, 0); break; case -ENOMEM: rds_stats_inc(s_send_delayed_retry); queue_delayed_work(rds_wq, &cp->cp_send_w, 2); default: break; } } }
void rds_connect_worker(struct work_struct *work) { struct rds_conn_path *cp = container_of(work, struct rds_conn_path, cp_conn_w.work); struct rds_connection *conn = cp->cp_conn; int ret; if (cp->cp_index > 1 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr) return; clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING); if (ret) { ret = conn->c_trans->conn_path_connect(cp); rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n", conn, &conn->c_laddr, &conn->c_faddr, ret); if (ret) { if (rds_conn_path_transition(cp, RDS_CONN_CONNECTING, RDS_CONN_DOWN)) rds_queue_reconnect(cp); else rds_conn_path_error(cp, "connect failed\n"); } } }
void rds_tcp_listen_data_ready(struct sock *sk) { void (*ready)(struct sock *sk); rdsdebug("listen data ready sk %p\n", sk); read_lock(&sk->sk_callback_lock); ready = sk->sk_user_data; if (!ready) { /* check for teardown race */ ready = sk->sk_data_ready; goto out; } /* * ->sk_data_ready is also called for a newly established child socket * before it has been accepted and the accepter has set up their * data_ready.. we only want to queue listen work for our listening * socket */ if (sk->sk_state == TCP_LISTEN) rds_tcp_accept_work(sk); out: read_unlock(&sk->sk_callback_lock); ready(sk); }
void rds_connect_complete(struct rds_connection *conn) { if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) { printk(KERN_WARNING "%s: Cannot transition to state UP" ", current state is %d\n", __func__, atomic_read(&conn->c_state)); atomic_set(&conn->c_state, RDS_CONN_ERROR); queue_work(rds_wq, &conn->c_down_w); return; } rdsdebug("conn %p for %pI4 to %pI4 complete\n", conn, &conn->c_laddr, &conn->c_faddr); conn->c_reconnect_jiffies = 0; set_bit(0, &conn->c_map_queued); queue_delayed_work(rds_wq, &conn->c_send_w, 0); queue_delayed_work(rds_wq, &conn->c_recv_w, 0); queue_delayed_work(rds_wq, &conn->c_hb_w, 0); conn->c_hb_start = 0; conn->c_connection_start = get_seconds(); conn->c_reconnect = 1; conn->c_committed_version = conn->c_version; conn->c_proposed_version = RDS_PROTOCOL_VERSION; }
void rds_hb_worker(struct work_struct *work) { struct rds_connection *conn = container_of(work, struct rds_connection, c_hb_w.work); unsigned long now = get_seconds(); int ret; if (!rds_conn_hb_timeout || conn->c_loopback) return; if (rds_conn_state(conn) == RDS_CONN_UP) { if (!conn->c_hb_start) { ret = rds_send_hb(conn, 0); if (ret) { rdsdebug("RDS/IB: rds_hb_worker: failed %d\n", ret); return; } conn->c_hb_start = now; } else if (now - conn->c_hb_start > rds_conn_hb_timeout) { printk(KERN_NOTICE "RDS/IB: connection <%u.%u.%u.%u,%u.%u.%u.%u,%d> " "timed out (0x%lx,0x%lx)..disconnecting and reconnecting\n", NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos, conn->c_hb_start, now); rds_conn_drop(conn); return; } queue_delayed_work(rds_wq, &conn->c_hb_w, HZ); } }
/* * Before killing the tcp socket this needs to serialize with callbacks. The * caller has already grabbed the sending sem so we're serialized with other * senders. * * TCP calls the callbacks with the sock lock so we hold it while we reset the * callbacks to those set by TCP. Our callbacks won't execute again once we * hold the sock lock. */ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp) { struct rds_tcp_connection *tc = cp->cp_transport_data; struct socket *sock = tc->t_sock; rdsdebug("shutting down conn %p tc %p sock %p\n", cp->cp_conn, tc, sock); if (sock) { if (rds_destroy_pending(cp->cp_conn)) rds_tcp_set_linger(sock); sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN); lock_sock(sock->sk); rds_tcp_restore_callbacks(sock, tc); /* tc->tc_sock = NULL */ release_sock(sock->sk); sock_release(sock); } if (tc->t_tinc) { rds_inc_put(&tc->t_tinc->ti_inc); tc->t_tinc = NULL; } tc->t_tinc_hdr_rem = sizeof(struct rds_header); tc->t_tinc_data_rem = 0; }
/* * get_user_pages() called flush_dcache_page() on the pages for us. */ void rds_info_copy(struct rds_info_iterator *iter, void *data, unsigned long bytes) { unsigned long this; while (bytes) { if (iter->addr == NULL) iter->addr = kmap_atomic(*iter->pages, KM_USER0); this = min(bytes, PAGE_SIZE - iter->offset); rdsdebug("page %p addr %p offset %lu this %lu data %p " "bytes %lu\n", *iter->pages, iter->addr, iter->offset, this, data, bytes); memcpy(iter->addr + iter->offset, data, this); data += this; bytes -= this; iter->offset += this; if (iter->offset == PAGE_SIZE) { kunmap_atomic(iter->addr, KM_USER0); iter->addr = NULL; iter->offset = 0; iter->pages++; } } }
void rds_connect_worker(struct work_struct *work) { struct rds_connection *conn = container_of(work, struct rds_connection, c_conn_w.work); int ret; clear_bit(RDS_RECONNECT_PENDING, &conn->c_flags); if (rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { /* * record the time we started trying to connect so that we can * drop the connection if it doesn't work out after a while */ conn->c_connection_start = get_seconds(); ret = conn->c_trans->conn_connect(conn); rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n", conn, &conn->c_laddr, &conn->c_faddr, ret); if (ret) { if (rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_DOWN)) { rds_queue_reconnect(conn); } else rds_conn_error(conn, "RDS: connect failed\n"); } } }
void rds_tcp_listen_data_ready(struct sock *sk, int bytes) { void (*ready)(struct sock *sk, int bytes); rdsdebug("listen data ready sk %p\n", sk); read_lock_bh(&sk->sk_callback_lock); ready = sk->sk_user_data; if (!ready) { /* */ ready = sk->sk_data_ready; goto out; } /* */ if (sk->sk_state == TCP_LISTEN) queue_work(rds_wq, &rds_tcp_listen_work); out: read_unlock_bh(&sk->sk_callback_lock); ready(sk, bytes); }
static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk, struct rds_cong_map *map, int delta, __be16 port) { int now_congested; if (delta == 0) return; rs->rs_rcv_bytes += delta; now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs); rdsdebug("rs %p (%pI4:%u) recv bytes %d buf %d " "now_cong %d delta %d\n", rs, &rs->rs_bound_addr, ntohs(rs->rs_bound_port), rs->rs_rcv_bytes, rds_sk_rcvbuf(rs), now_congested, delta); /* wasn't -> am congested */ if (!rs->rs_congested && now_congested) { rs->rs_congested = 1; rds_cong_set_bit(map, port); rds_cong_queue_updates(map); } /* was -> aren't congested */ /* Require more free space before reporting uncongested to prevent bouncing cong/uncong state too often */ else if (rs->rs_congested && (rs->rs_rcv_bytes < (rds_sk_rcvbuf(rs)/2))) { rs->rs_congested = 0; rds_cong_clear_bit(map, port); rds_cong_queue_updates(map); } /* do nothing if no change in cong state */ }
void rds_tcp_state_change(struct sock *sk) { void (*state_change)(struct sock *sk); struct rds_conn_path *cp; struct rds_tcp_connection *tc; read_lock_bh(&sk->sk_callback_lock); cp = sk->sk_user_data; if (!cp) { state_change = sk->sk_state_change; goto out; } tc = cp->cp_transport_data; state_change = tc->t_orig_state_change; rdsdebug("sock %p state_change to %d\n", tc->t_sock, sk->sk_state); switch (sk->sk_state) { /* ignore connecting sockets as they make progress */ case TCP_SYN_SENT: case TCP_SYN_RECV: break; case TCP_ESTABLISHED: rds_connect_path_complete(cp, RDS_CONN_CONNECTING); break; case TCP_CLOSE_WAIT: case TCP_CLOSE: rds_conn_path_drop(cp); default: break; } out: read_unlock_bh(&sk->sk_callback_lock); state_change(sk); }
int rds_message_copy_from_user(struct rds_message *rm, struct iovec *first_iov, size_t total_len) { unsigned long to_copy; unsigned long iov_off; unsigned long sg_off; struct iovec *iov; struct scatterlist *sg; int ret = 0; rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); /* * now allocate and copy in the data payload. */ sg = rm->data.op_sg; iov = first_iov; iov_off = 0; sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ while (total_len) { if (!sg_page(sg)) { ret = rds_page_remainder_alloc(sg, total_len, GFP_HIGHUSER); if (ret) goto out; rm->data.op_nents++; sg_off = 0; } while (iov_off == iov->iov_len) { iov_off = 0; iov++; } to_copy = min(iov->iov_len - iov_off, sg->length - sg_off); to_copy = min_t(size_t, to_copy, total_len); rdsdebug("copying %lu bytes from user iov [%p, %zu] + %lu to " "sg [%p, %u, %u] + %lu\n", to_copy, iov->iov_base, iov->iov_len, iov_off, (void *)sg_page(sg), sg->offset, sg->length, sg_off); ret = rds_page_copy_from_user(sg_page(sg), sg->offset + sg_off, iov->iov_base + iov_off, to_copy); if (ret) goto out; iov_off += to_copy; total_len -= to_copy; sg_off += to_copy; if (sg_off == sg->length) sg++; } out: return ret; }
void rds_tcp_state_change(struct sock *sk) { void (*state_change)(struct sock *sk); struct rds_connection *conn; struct rds_tcp_connection *tc; read_lock(&sk->sk_callback_lock); conn = sk->sk_user_data; if (conn == NULL) { state_change = sk->sk_state_change; goto out; } tc = conn->c_transport_data; state_change = tc->t_orig_state_change; rdsdebug("sock %p state_change to %d\n", tc->t_sock, sk->sk_state); switch(sk->sk_state) { /* ignore connecting sockets as they make progress */ case TCP_SYN_SENT: case TCP_SYN_RECV: break; case TCP_ESTABLISHED: rds_connect_complete(conn); break; case TCP_CLOSE: rds_conn_drop(conn); default: break; } out: read_unlock(&sk->sk_callback_lock); state_change(sk); }
static void rds_recv_drop(struct rds_connection *conn, __be32 saddr, __be32 daddr, struct rds_incoming *inc, gfp_t gfp) { /* drop the existing incoming message */ rdsdebug("dropping request on conn %p, inc %p, %u.%u.%u.%u -> %u.%u.%u.%u", conn, inc, NIPQUAD(saddr), NIPQUAD(daddr)); }
static void rds_rdma_listen_stop(void) { if (rds_rdma_listen_id) { rdsdebug("cm %p\n", rds_rdma_listen_id); rdma_destroy_id(rds_rdma_listen_id); rds_rdma_listen_id = NULL; } }
void rds_inc_put(struct rds_incoming *inc) { rdsdebug("put inc %p ref %d\n", inc, atomic_read(&inc->i_refcount)); if (atomic_dec_and_test(&inc->i_refcount)) { BUG_ON(!list_empty(&inc->i_item)); inc->i_conn->c_trans->inc_free(inc); } }
int rds_message_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov, size_t size) { struct rds_message *rm; struct iovec *iov; struct scatterlist *sg; unsigned long to_copy; unsigned long iov_off; unsigned long vec_off; int copied; int ret; u32 len; rm = container_of(inc, struct rds_message, m_inc); len = be32_to_cpu(rm->m_inc.i_hdr.h_len); iov = first_iov; iov_off = 0; sg = rm->data.op_sg; vec_off = 0; copied = 0; while (copied < size && copied < len) { while (iov_off == iov->iov_len) { iov_off = 0; iov++; } to_copy = min(iov->iov_len - iov_off, sg->length - vec_off); to_copy = min_t(size_t, to_copy, size - copied); to_copy = min_t(unsigned long, to_copy, len - copied); rdsdebug("copying %lu bytes to user iov [%p, %zu] + %lu to " "sg [%p, %u, %u] + %lu\n", to_copy, iov->iov_base, iov->iov_len, iov_off, sg_page(sg), sg->offset, sg->length, vec_off); ret = rds_page_copy_to_user(sg_page(sg), sg->offset + vec_off, iov->iov_base + iov_off, to_copy); if (ret) { copied = ret; break; } iov_off += to_copy; vec_off += to_copy; copied += to_copy; if (vec_off == sg->length) { vec_off = 0; sg++; } } return copied; }
static int rds_tcp_accept_one(struct socket *sock) { struct socket *new_sock = NULL; struct rds_connection *conn; int ret; struct inet_sock *inet; ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type, sock->sk->sk_protocol, &new_sock); if (ret) goto out; new_sock->type = sock->type; new_sock->ops = sock->ops; ret = sock->ops->accept(sock, new_sock, O_NONBLOCK); if (ret < 0) goto out; rds_tcp_tune(new_sock); inet = inet_sk(new_sock->sk); rdsdebug("accepted tcp %pI4:%u -> %pI4:%u\n", &inet->inet_saddr, ntohs(inet->inet_sport), &inet->inet_daddr, ntohs(inet->inet_dport)); conn = rds_conn_create(inet->inet_saddr, inet->inet_daddr, &rds_tcp_transport, GFP_KERNEL); if (IS_ERR(conn)) { ret = PTR_ERR(conn); goto out; } /* * see the comment above rds_queue_delayed_reconnect() */ if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { if (rds_conn_state(conn) == RDS_CONN_UP) rds_tcp_stats_inc(s_tcp_listen_closed_stale); else rds_tcp_stats_inc(s_tcp_connect_raced); rds_conn_drop(conn); ret = 0; goto out; } rds_tcp_set_callbacks(new_sock, conn); rds_connect_complete(conn); new_sock = NULL; ret = 0; out: if (new_sock) sock_release(new_sock); return ret; }
static void rds_ib_qp_event_handler(struct ib_event *event, void *data) { struct rds_connection *conn = data; struct rds_ib_connection *ic = conn->c_transport_data; rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event); switch (event->event) { case IB_EVENT_COMM_EST: rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); break; default: rdsdebug("Fatal QP Event %u " "- connection %pI4->%pI4, reconnecting\n", event->event, &conn->c_laddr, &conn->c_faddr); rds_conn_drop(conn); break; } }
static void rds_ib_cq_comp_handler_send(struct ib_cq *cq, void *context) { struct rds_connection *conn = context; struct rds_ib_connection *ic = conn->c_transport_data; rdsdebug("conn %p cq %p\n", conn, cq); rds_ib_stats_inc(s_ib_evt_handler_call); tasklet_schedule(&ic->i_send_tasklet); }
void rds_message_put(struct rds_message *rm) { rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount)); WARN(!atomic_read(&rm->m_refcount), "danger refcount zero on %p\n", rm); if (atomic_dec_and_test(&rm->m_refcount)) { BUG_ON(!list_empty(&rm->m_sock_item)); BUG_ON(!list_empty(&rm->m_conn_item)); rds_message_purge(rm); kfree(rm); } }
static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, struct rds_ib_send_work *send, int wc_status) { struct rds_message *rm = send->s_rm; rdsdebug("ic %p send %p rm %p\n", ic, send, rm); ib_dma_unmap_sg(ic->i_cm_id->device, rm->m_sg, rm->m_nents, DMA_TO_DEVICE); if (rm->m_rdma_op != NULL) { rds_ib_send_unmap_rdma(ic, rm->m_rdma_op); /* If the user asked for a completion notification on this * message, we can implement three different semantics: * 1. Notify when we received the ACK on the RDS message * that was queued with the RDMA. This provides reliable * notification of RDMA status at the expense of a one-way * packet delay. * 2. Notify when the IB stack gives us the completion event for * the RDMA operation. * 3. Notify when the IB stack gives us the completion event for * the accompanying RDS messages. * Here, we implement approach #3. To implement approach #2, * call rds_rdma_send_complete from the cq_handler. To implement #1, * don't call rds_rdma_send_complete at all, and fall back to the notify * handling in the ACK processing code. * * Note: There's no need to explicitly sync any RDMA buffers using * ib_dma_sync_sg_for_cpu - the completion for the RDMA * operation itself unmapped the RDMA buffers, which takes care * of synching. */ rds_ib_send_rdma_complete(rm, wc_status); if (rm->m_rdma_op->r_write) rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); else rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); } /* If anyone waited for this message to get flushed out, wake * them up now */ rds_message_unmapped(rm); rds_message_put(rm); send->s_rm = NULL; }
void rds_remove_bound(struct rds_sock *rs) { if (!rs->rs_bound_addr) return; rdsdebug("rs %p unbinding from %pI4:%d\n", rs, &rs->rs_bound_addr, ntohs(rs->rs_bound_port)); rhashtable_remove_fast(&bind_hash_table, &rs->rs_bound_node, ht_parms); rds_sock_put(rs); rs->rs_bound_addr = 0; }
/* returns -ve errno or +ve port */ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) { int ret = -EADDRINUSE; u16 rover, last; u64 key; if (*port != 0) { rover = be16_to_cpu(*port); if (rover == RDS_FLAG_PROBE_PORT) return -EINVAL; last = rover; } else { rover = max_t(u16, prandom_u32(), 2); last = rover - 1; } do { if (rover == 0) rover++; if (rover == RDS_FLAG_PROBE_PORT) continue; key = ((u64)addr << 32) | cpu_to_be16(rover); if (rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms)) continue; rs->rs_bound_key = key; rs->rs_bound_addr = addr; net_get_random_once(&rs->rs_hash_initval, sizeof(rs->rs_hash_initval)); rs->rs_bound_port = cpu_to_be16(rover); rs->rs_bound_node.next = NULL; rds_sock_addref(rs); if (!rhashtable_insert_fast(&bind_hash_table, &rs->rs_bound_node, ht_parms)) { *port = rs->rs_bound_port; ret = 0; rdsdebug("rs %p binding to %pI4:%d\n", rs, &addr, (int)ntohs(*port)); break; } else { rds_sock_put(rs); ret = -ENOMEM; break; } } while (rover++ != last); return ret; }
void rds_page_exit(void) { unsigned int cpu; for_each_possible_cpu(cpu) { struct rds_page_remainder *rem; rem = &per_cpu(rds_page_remainders, cpu); rdsdebug("cpu %u\n", cpu); if (rem->r_page) __free_page(rem->r_page); rem->r_page = NULL; } }
/* * Return the rds_sock bound at the given local address. * * The rx path can race with rds_release. We notice if rds_release() has * marked this socket and don't return a rs ref to the rx path. */ struct rds_sock *rds_find_bound(__be32 addr, __be16 port) { u64 key = ((u64)addr << 32) | port; struct rds_sock *rs; rs = rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms); if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) rds_sock_addref(rs); else rs = NULL; rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr, ntohs(port)); return rs; }