static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, struct rds_ib_send_work *send, int wc_status) { struct rds_message *rm = send->s_rm; rdsdebug("ic %p send %p rm %p\n", ic, send, rm); ib_dma_unmap_sg(ic->i_cm_id->device, rm->m_sg, rm->m_nents, DMA_TO_DEVICE); if (rm->m_rdma_op != NULL) { rds_ib_send_unmap_rdma(ic, rm->m_rdma_op); /* If the user asked for a completion notification on this * message, we can implement three different semantics: * 1. Notify when we received the ACK on the RDS message * that was queued with the RDMA. This provides reliable * notification of RDMA status at the expense of a one-way * packet delay. * 2. Notify when the IB stack gives us the completion event for * the RDMA operation. * 3. Notify when the IB stack gives us the completion event for * the accompanying RDS messages. * Here, we implement approach #3. To implement approach #2, * call rds_rdma_send_complete from the cq_handler. To implement #1, * don't call rds_rdma_send_complete at all, and fall back to the notify * handling in the ACK processing code. * * Note: There's no need to explicitly sync any RDMA buffers using * ib_dma_sync_sg_for_cpu - the completion for the RDMA * operation itself unmapped the RDMA buffers, which takes care * of synching. */ rds_ib_send_rdma_complete(rm, wc_status); if (rm->m_rdma_op->r_write) rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); else rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); } /* If anyone waited for this message to get flushed out, wake * them up now */ rds_message_unmapped(rm); rds_message_put(rm); send->s_rm = NULL; }
/* * returns 0 on success or -errno on failure. * * We don't have to worry about flush_dcache_page() as this only works * with private pages. If, say, we were to do directed receive to pinned * user pages we'd have to worry more about cache coherence. (Though * the flush_dcache_page() in get_user_pages() would probably be enough). */ int rds_page_copy_user(struct page *page, unsigned long offset, void __user *ptr, unsigned long bytes, int to_user) { unsigned long ret; void *addr; addr = kmap(page); if (to_user) { rds_stats_add(s_copy_to_user, bytes); ret = copy_to_user(ptr, addr + offset, bytes); } else { rds_stats_add(s_copy_from_user, bytes); ret = copy_from_user(addr + offset, ptr, bytes); } kunmap(page); return ret ? -EFAULT : 0; }
static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, struct rm_rdma_op *op, int wc_status) { if (op->op_mapped) { ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, op->op_nents, op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); op->op_mapped = 0; } /* If the user asked for a completion notification on this * message, we can implement three different semantics: * 1. Notify when we received the ACK on the RDS message * that was queued with the RDMA. This provides reliable * notification of RDMA status at the expense of a one-way * packet delay. * 2. Notify when the IB stack gives us the completion event for * the RDMA operation. * 3. Notify when the IB stack gives us the completion event for * the accompanying RDS messages. * Here, we implement approach #3. To implement approach #2, * we would need to take an event for the rdma WR. To implement #1, * don't call rds_rdma_send_complete at all, and fall back to the notify * handling in the ACK processing code. * * Note: There's no need to explicitly sync any RDMA buffers using * ib_dma_sync_sg_for_cpu - the completion for the RDMA * operation itself unmapped the RDMA buffers, which takes care * of synching. */ rds_ib_send_complete(container_of(op, struct rds_message, rdma), wc_status, rds_rdma_send_complete); if (op->op_write) rds_stats_add(s_send_rdma_bytes, op->op_bytes); else rds_stats_add(s_recv_rdma_bytes, op->op_bytes); }
static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk, struct rds_cong_map *map, int delta, __be16 port) { int now_congested; if (delta == 0) return; rs->rs_rcv_bytes += delta; if (delta > 0) rds_stats_add(s_recv_bytes_added_to_socket, delta); else rds_stats_add(s_recv_bytes_removed_from_socket, -delta); now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs); rdsdebug("rs %p (%pI4:%u) recv bytes %d buf %d " "now_cong %d delta %d\n", rs, &rs->rs_bound_addr, ntohs(rs->rs_bound_port), rs->rs_rcv_bytes, rds_sk_rcvbuf(rs), now_congested, delta); /* wasn't -> am congested */ if (!rs->rs_congested && now_congested) { rs->rs_congested = 1; rds_cong_set_bit(map, port); rds_cong_queue_updates(map); } /* was -> aren't congested */ /* Require more free space before reporting uncongested to prevent bouncing cong/uncong state too often */ else if (rs->rs_congested && (rs->rs_rcv_bytes < (rds_sk_rcvbuf(rs)/2))) { rs->rs_congested = 0; rds_cong_clear_bit(map, port); rds_cong_queue_updates(map); } /* do nothing if no change in cong state */ }
int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from) { unsigned long to_copy, nbytes; unsigned long sg_off; struct scatterlist *sg; int ret = 0; rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from)); /* * now allocate and copy in the data payload. */ sg = rm->data.op_sg; sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ while (iov_iter_count(from)) { if (!sg_page(sg)) { ret = rds_page_remainder_alloc(sg, iov_iter_count(from), GFP_HIGHUSER); if (ret) return ret; rm->data.op_nents++; sg_off = 0; } to_copy = min_t(unsigned long, iov_iter_count(from), sg->length - sg_off); rds_stats_add(s_copy_from_user, to_copy); nbytes = copy_page_from_iter(sg_page(sg), sg->offset + sg_off, to_copy, from); if (nbytes != to_copy) return -EFAULT; sg_off += to_copy; if (sg_off == sg->length) sg++; } return ret; }
int rds_message_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to) { struct rds_message *rm; struct scatterlist *sg; unsigned long to_copy; unsigned long vec_off; int copied; int ret; u32 len; rm = container_of(inc, struct rds_message, m_inc); len = be32_to_cpu(rm->m_inc.i_hdr.h_len); sg = rm->data.op_sg; vec_off = 0; copied = 0; while (iov_iter_count(to) && copied < len) { to_copy = min_t(unsigned long, iov_iter_count(to), sg->length - vec_off); to_copy = min_t(unsigned long, to_copy, len - copied); rds_stats_add(s_copy_to_user, to_copy); ret = copy_page_to_iter(sg_page(sg), sg->offset + vec_off, to_copy, to); if (ret != to_copy) return -EFAULT; vec_off += to_copy; copied += to_copy; if (vec_off == sg->length) { vec_off = 0; sg++; } } return copied; }