static void rds_ib_tasklet_fn_recv(unsigned long data) { struct rds_ib_connection *ic = (struct rds_ib_connection *)data; struct rds_connection *conn = ic->conn; struct rds_ib_device *rds_ibdev = ic->rds_ibdev; struct rds_ib_ack_state state; if (!rds_ibdev) rds_conn_drop(conn); rds_ib_stats_inc(s_ib_tasklet_call); memset(&state, 0, sizeof(state)); poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state); ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state); if (state.ack_next_valid) rds_ib_set_ack(ic, state.ack_next, state.ack_required); if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) { rds_send_drop_acked(conn, state.ack_recv, NULL); ic->i_ack_recv = state.ack_recv; } if (rds_conn_up(conn)) rds_ib_attempt_ack(ic); }
void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) { struct rds_ib_mr *ibmr = (void *)(unsigned long)wc->wr_id; struct rds_ib_frmr *frmr = &ibmr->u.frmr; if (wc->status != IB_WC_SUCCESS) { frmr->fr_state = FRMR_IS_STALE; if (rds_conn_up(ic->conn)) rds_ib_conn_error(ic->conn, "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n", &ic->conn->c_laddr, &ic->conn->c_faddr, wc->status, ib_wc_status_msg(wc->status), wc->vendor_err); } if (frmr->fr_inv) { frmr->fr_state = FRMR_IS_FREE; frmr->fr_inv = false; atomic_inc(&ic->i_fastreg_wrs); } else { atomic_inc(&ic->i_fastunreg_wrs); } }
int rds_tcp_conn_connect(struct rds_connection *conn) { struct socket *sock = NULL; struct sockaddr_in src, dest; int ret; struct rds_tcp_connection *tc = conn->c_transport_data; mutex_lock(&tc->t_conn_lock); if (rds_conn_up(conn)) { mutex_unlock(&tc->t_conn_lock); return 0; } ret = sock_create_kern(rds_conn_net(conn), PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret < 0) goto out; rds_tcp_tune(sock); src.sin_family = AF_INET; src.sin_addr.s_addr = (__force u32)conn->c_laddr; src.sin_port = (__force u16)htons(0); ret = sock->ops->bind(sock, (struct sockaddr *)&src, sizeof(src)); if (ret) { rdsdebug("bind failed with %d at address %pI4\n", ret, &conn->c_laddr); goto out; } dest.sin_family = AF_INET; dest.sin_addr.s_addr = (__force u32)conn->c_faddr; dest.sin_port = (__force u16)htons(RDS_TCP_PORT); /* * once we call connect() we can start getting callbacks and they * own the socket */ rds_tcp_set_callbacks(sock, conn); ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest), O_NONBLOCK); rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret); if (ret == -EINPROGRESS) ret = 0; if (ret == 0) { rds_tcp_keepalive(sock); sock = NULL; } else { rds_tcp_restore_callbacks(sock, conn->c_transport_data); } out: mutex_unlock(&tc->t_conn_lock); if (sock) sock_release(sock); return ret; }
static void rds_ib_tasklet_fn_send(unsigned long data) { struct rds_ib_connection *ic = (struct rds_ib_connection *)data; struct rds_connection *conn = ic->conn; rds_ib_stats_inc(s_ib_tasklet_call); poll_scq(ic, ic->i_send_cq, ic->i_send_wc); ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); poll_scq(ic, ic->i_send_cq, ic->i_send_wc); if (rds_conn_up(conn) && (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) || test_bit(0, &conn->c_map_queued))) rds_send_xmit(ic->conn); }
static void rds_ib_tasklet_fn_send(unsigned long data) { struct rds_ib_connection *ic = (struct rds_ib_connection *)data; struct rds_connection *conn = ic->conn; rds_ib_stats_inc(s_ib_tasklet_call); /* if cq has been already reaped, ignore incoming cq event */ if (atomic_read(&ic->i_cq_quiesce)) return; poll_scq(ic, ic->i_send_cq, ic->i_send_wc); ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); poll_scq(ic, ic->i_send_cq, ic->i_send_wc); if (rds_conn_up(conn) && (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) || test_bit(0, &conn->c_map_queued))) rds_send_xmit(&ic->conn->c_path[0]); }
/* * The _oldest/_free ring operations here race cleanly with the alloc/unalloc * operations performed in the send path. As the sender allocs and potentially * unallocs the next free entry in the ring it doesn't alter which is * the next to be freed, which is what this is concerned with. */ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) { struct rds_connection *conn = context; struct rds_ib_connection *ic = conn->c_transport_data; struct ib_wc wc; struct rds_ib_send_work *send; u32 completed; u32 oldest; u32 i = 0; int ret; rdsdebug("cq %p conn %p\n", cq, conn); rds_ib_stats_inc(s_ib_tx_cq_call); ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); if (ret) rdsdebug("ib_req_notify_cq send failed: %d\n", ret); while (ib_poll_cq(cq, 1, &wc) > 0) { rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", (unsigned long long)wc.wr_id, wc.status, wc.byte_len, be32_to_cpu(wc.ex.imm_data)); rds_ib_stats_inc(s_ib_tx_cq_event); if (wc.wr_id == RDS_IB_ACK_WR_ID) { if (ic->i_ack_queued + HZ/2 < jiffies) rds_ib_stats_inc(s_ib_tx_stalled); rds_ib_ack_send_complete(ic); continue; } oldest = rds_ib_ring_oldest(&ic->i_send_ring); completed = rds_ib_ring_completed(&ic->i_send_ring, wc.wr_id, oldest); for (i = 0; i < completed; i++) { send = &ic->i_sends[oldest]; /* In the error case, wc.opcode sometimes contains garbage */ switch (send->s_wr.opcode) { case IB_WR_SEND: if (send->s_rm) rds_ib_send_unmap_rm(ic, send, wc.status); break; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_READ: /* Nothing to be done - the SG list will be unmapped * when the SEND completes. */ break; default: if (printk_ratelimit()) printk(KERN_NOTICE "RDS/IB: %s: unexpected opcode 0x%x in WR!\n", __func__, send->s_wr.opcode); break; } send->s_wr.opcode = 0xdead; send->s_wr.num_sge = 1; if (send->s_queued + HZ/2 < jiffies) rds_ib_stats_inc(s_ib_tx_stalled); /* If a RDMA operation produced an error, signal this right * away. If we don't, the subsequent SEND that goes with this * RDMA will be canceled with ERR_WFLUSH, and the application * never learn that the RDMA failed. */ if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) { struct rds_message *rm; rm = rds_send_get_message(conn, send->s_op); if (rm) rds_ib_send_rdma_complete(rm, wc.status); } oldest = (oldest + 1) % ic->i_send_ring.w_nr; } rds_ib_ring_free(&ic->i_send_ring, completed); if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) || test_bit(0, &conn->c_map_queued)) queue_delayed_work(rds_wq, &conn->c_send_w, 0); /* We expect errors as the qp is drained during shutdown */ if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) { rds_ib_conn_error(conn, "send completion on %pI4 " "had status %u, disconnecting and reconnecting\n", &conn->c_faddr, wc.status); } } }