Ejemplo n.º 1
0
static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
{
	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
	long idle_time = 0;

	if (xprt_connected(xprt))
		idle_time = (long)(jiffies - xprt->last_used) / HZ;

	seq_printf(seq,
	  "\txprt:\trdma %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu "
	  "%lu %lu %lu %Lu %Lu %Lu %Lu %lu %lu %lu\n",

	   0,	/* need a local port? */
	   xprt->stat.bind_count,
	   xprt->stat.connect_count,
	   xprt->stat.connect_time,
	   idle_time,
	   xprt->stat.sends,
	   xprt->stat.recvs,
	   xprt->stat.bad_xids,
	   xprt->stat.req_u,
	   xprt->stat.bklog_u,

	   r_xprt->rx_stats.read_chunk_count,
	   r_xprt->rx_stats.write_chunk_count,
	   r_xprt->rx_stats.reply_chunk_count,
	   r_xprt->rx_stats.total_rdma_request,
	   r_xprt->rx_stats.total_rdma_reply,
	   r_xprt->rx_stats.pullup_copy_count,
	   r_xprt->rx_stats.fixup_copy_count,
	   r_xprt->rx_stats.hardway_register_count,
	   r_xprt->rx_stats.failed_marshal_count,
	   r_xprt->rx_stats.bad_reply_count);
}
Ejemplo n.º 2
0
/**
 * xprt_rdma_send_request - marshal and send an RPC request
 * @rqst: RPC message in rq_snd_buf
 *
 * Caller holds the transport's write lock.
 *
 * Returns:
 *	%0 if the RPC message has been sent
 *	%-ENOTCONN if the caller should reconnect and call again
 *	%-EAGAIN if the caller should call again
 *	%-ENOBUFS if the caller should call again after a delay
 *	%-EIO if a permanent error occurred and the request was not
 *		sent. Do not try to send this message again.
 */
static int
xprt_rdma_send_request(struct rpc_rqst *rqst)
{
	struct rpc_xprt *xprt = rqst->rq_xprt;
	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
	int rc = 0;

#if defined(CONFIG_SUNRPC_BACKCHANNEL)
	if (unlikely(!rqst->rq_buffer))
		return xprt_rdma_bc_send_reply(rqst);
#endif	/* CONFIG_SUNRPC_BACKCHANNEL */

	if (!xprt_connected(xprt))
		goto drop_connection;

	if (!xprt_request_get_cong(xprt, rqst))
		return -EBADSLT;

	rc = rpcrdma_marshal_req(r_xprt, rqst);
	if (rc < 0)
		goto failed_marshal;

	/* Must suppress retransmit to maintain credits */
	if (rqst->rq_connect_cookie == xprt->connect_cookie)
		goto drop_connection;
	rqst->rq_xtime = ktime_get();

	__set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
	if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
		goto drop_connection;

	rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
	rqst->rq_bytes_sent = 0;

	/* An RPC with no reply will throw off credit accounting,
	 * so drop the connection to reset the credit grant.
	 */
	if (!rpc_reply_expected(rqst->rq_task))
		goto drop_connection;
	return 0;

failed_marshal:
	if (rc != -ENOTCONN)
		return rc;
drop_connection:
	xprt_disconnect_done(xprt);
	return -ENOTCONN;	/* implies disconnect */
}
Ejemplo n.º 3
0
/**
 * xs_tcp_print_stats - display TCP socket-specifc stats
 * @xprt: rpc_xprt struct containing statistics
 * @seq: output file
 *
 */
static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
{
    long idle_time = 0;

    if (xprt_connected(xprt))
        idle_time = (long)(jiffies - xprt->last_used) / HZ;

    seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n",
               xprt->port,
               xprt->stat.bind_count,
               xprt->stat.connect_count,
               xprt->stat.connect_time,
               idle_time,
               xprt->stat.sends,
               xprt->stat.recvs,
               xprt->stat.bad_xids,
               xprt->stat.req_u,
               xprt->stat.bklog_u);
}
Ejemplo n.º 4
0
void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
{
	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
	long idle_time = 0;

	if (xprt_connected(xprt))
		idle_time = (long)(jiffies - xprt->last_used) / HZ;

	seq_puts(seq, "\txprt:\trdma ");
	seq_printf(seq, "%u %lu %lu %lu %ld %lu %lu %lu %llu %llu ",
		   0,	/* need a local port? */
		   xprt->stat.bind_count,
		   xprt->stat.connect_count,
		   xprt->stat.connect_time / HZ,
		   idle_time,
		   xprt->stat.sends,
		   xprt->stat.recvs,
		   xprt->stat.bad_xids,
		   xprt->stat.req_u,
		   xprt->stat.bklog_u);
	seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %lu %lu %lu %lu ",
		   r_xprt->rx_stats.read_chunk_count,
		   r_xprt->rx_stats.write_chunk_count,
		   r_xprt->rx_stats.reply_chunk_count,
		   r_xprt->rx_stats.total_rdma_request,
		   r_xprt->rx_stats.total_rdma_reply,
		   r_xprt->rx_stats.pullup_copy_count,
		   r_xprt->rx_stats.fixup_copy_count,
		   r_xprt->rx_stats.hardway_register_count,
		   r_xprt->rx_stats.failed_marshal_count,
		   r_xprt->rx_stats.bad_reply_count,
		   r_xprt->rx_stats.nomsg_call_count);
	seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n",
		   r_xprt->rx_stats.mrs_recycled,
		   r_xprt->rx_stats.mrs_orphaned,
		   r_xprt->rx_stats.mrs_allocated,
		   r_xprt->rx_stats.local_inv_needed,
		   r_xprt->rx_stats.empty_sendctx_q,
		   r_xprt->rx_stats.reply_waits_for_send);
}
Ejemplo n.º 5
0
static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len)
{
    struct rpc_xprt *xprt = rd_desc->arg.data;
    skb_reader_t desc = {
        .skb	= skb,
        .offset	= offset,
        .count	= len,
        .csum	= 0
    };

    dprintk("RPC:      xs_tcp_data_recv started\n");
    do {
        /* Read in a new fragment marker if necessary */
        /* Can we ever really expect to get completely empty fragments? */
        if (xprt->tcp_flags & XPRT_COPY_RECM) {
            xs_tcp_read_fraghdr(xprt, &desc);
            continue;
        }
        /* Read in the xid if necessary */
        if (xprt->tcp_flags & XPRT_COPY_XID) {
            xs_tcp_read_xid(xprt, &desc);
            continue;
        }
        /* Read in the request data */
        if (xprt->tcp_flags & XPRT_COPY_DATA) {
            xs_tcp_read_request(xprt, &desc);
            continue;
        }
        /* Skip over any trailing bytes on short reads */
        xs_tcp_read_discard(xprt, &desc);
    } while (desc.count);
    dprintk("RPC:      xs_tcp_data_recv done\n");
    return len - desc.count;
}

/**
 * xs_tcp_data_ready - "data ready" callback for TCP sockets
 * @sk: socket with data to read
 * @bytes: how much data to read
 *
 */
static void xs_tcp_data_ready(struct sock *sk, int bytes)
{
    struct rpc_xprt *xprt;
    read_descriptor_t rd_desc;

    read_lock(&sk->sk_callback_lock);
    dprintk("RPC:      xs_tcp_data_ready...\n");
    if (!(xprt = xprt_from_sock(sk)))
        goto out;
    if (xprt->shutdown)
        goto out;

    /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
    rd_desc.arg.data = xprt;
    rd_desc.count = 65536;
    tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
out:
    read_unlock(&sk->sk_callback_lock);
}

/**
 * xs_tcp_state_change - callback to handle TCP socket state changes
 * @sk: socket whose state has changed
 *
 */
static void xs_tcp_state_change(struct sock *sk)
{
    struct rpc_xprt *xprt;

    read_lock(&sk->sk_callback_lock);
    if (!(xprt = xprt_from_sock(sk)))
        goto out;
    dprintk("RPC:      xs_tcp_state_change client %p...\n", xprt);
    dprintk("RPC:      state %x conn %d dead %d zapped %d\n",
            sk->sk_state, xprt_connected(xprt),
            sock_flag(sk, SOCK_DEAD),
            sock_flag(sk, SOCK_ZAPPED));

    switch (sk->sk_state) {
    case TCP_ESTABLISHED:
        spin_lock_bh(&xprt->transport_lock);
        if (!xprt_test_and_set_connected(xprt)) {
            /* Reset TCP record info */
            xprt->tcp_offset = 0;
            xprt->tcp_reclen = 0;
            xprt->tcp_copied = 0;

            if (xprt->tcp_flags & XPRT_SRCADDR_PRESENT)
                xprt->tcp_flags = XPRT_SRCADDR_PRESENT |
                                  XPRT_COPY_RECM |
                                  XPRT_COPY_XID;
            else
                xprt->tcp_flags = XPRT_COPY_RECM |
                                  XPRT_COPY_XID;

            xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
            xprt_wake_pending_tasks(xprt, 0);
        }
        spin_unlock_bh(&xprt->transport_lock);
        break;
    case TCP_SYN_SENT:
    case TCP_SYN_RECV:
        break;
    case TCP_CLOSE_WAIT:
        /* Try to schedule an autoclose RPC calls */
        set_bit(XPRT_CLOSE_WAIT, &xprt->state);
        if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
            queue_work(rpciod_workqueue, &xprt->task_cleanup);
    default:
        xprt_disconnect(xprt);
    }
out:
    read_unlock(&sk->sk_callback_lock);
}

/**
 * xs_udp_write_space - callback invoked when socket buffer space
 *                             becomes available
 * @sk: socket whose state has changed
 *
 * Called when more output buffer space is available for this socket.
 * We try not to wake our writers until they can make "significant"
 * progress, otherwise we'll waste resources thrashing kernel_sendmsg
 * with a bunch of small requests.
 */
static void xs_udp_write_space(struct sock *sk)
{
    read_lock(&sk->sk_callback_lock);

    /* from net/core/sock.c:sock_def_write_space */
    if (sock_writeable(sk)) {
        struct socket *sock;
        struct rpc_xprt *xprt;

        if (unlikely(!(sock = sk->sk_socket)))
            goto out;
        if (unlikely(!(xprt = xprt_from_sock(sk))))
            goto out;
        if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)))
            goto out;

        xprt_write_space(xprt);
    }

out:
    read_unlock(&sk->sk_callback_lock);
}
Ejemplo n.º 6
0
static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len)
{
    struct kvec iov = {
        .iov_base	= xdr->head[0].iov_base + base,
        .iov_len	= len - base,
    };
    struct msghdr msg = {
        .msg_name	= addr,
        .msg_namelen	= addrlen,
        .msg_flags	= XS_SENDMSG_FLAGS,
    };

    if (xdr->len > len)
        msg.msg_flags |= MSG_MORE;

    if (likely(iov.iov_len))
        return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
    return kernel_sendmsg(sock, &msg, NULL, 0, 0);
}

static int xs_send_tail(struct socket *sock, struct xdr_buf *xdr, unsigned int base, unsigned int len)
{
    struct kvec iov = {
        .iov_base	= xdr->tail[0].iov_base + base,
        .iov_len	= len - base,
    };
    struct msghdr msg = {
        .msg_flags	= XS_SENDMSG_FLAGS,
    };

    return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
}

/**
 * xs_sendpages - write pages directly to a socket
 * @sock: socket to send on
 * @addr: UDP only -- address of destination
 * @addrlen: UDP only -- length of destination address
 * @xdr: buffer containing this request
 * @base: starting position in the buffer
 *
 */
static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base)
{
    struct page **ppage = xdr->pages;
    unsigned int len, pglen = xdr->page_len;
    int err, ret = 0;
    ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int);

    if (unlikely(!sock))
        return -ENOTCONN;

    clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);

    len = xdr->head[0].iov_len;
    if (base < len || (addr != NULL && base == 0)) {
        err = xs_send_head(sock, addr, addrlen, xdr, base, len);
        if (ret == 0)
            ret = err;
        else if (err > 0)
            ret += err;
        if (err != (len - base))
            goto out;
        base = 0;
    } else
        base -= len;

    if (unlikely(pglen == 0))
        goto copy_tail;
    if (unlikely(base >= pglen)) {
        base -= pglen;
        goto copy_tail;
    }
    if (base || xdr->page_base) {
        pglen -= base;
        base += xdr->page_base;
        ppage += base >> PAGE_CACHE_SHIFT;
        base &= ~PAGE_CACHE_MASK;
    }

    sendpage = kernel_sendpage;
    do {
        int flags = XS_SENDMSG_FLAGS;

        len = PAGE_CACHE_SIZE;
        if (base)
            len -= base;
        if (pglen < len)
            len = pglen;

        if (pglen != len || xdr->tail[0].iov_len != 0)
            flags |= MSG_MORE;

        /* Hmm... We might be dealing with highmem pages */
        if (PageHighMem(*ppage))
            sendpage = sock_no_sendpage;
        err = sendpage(sock, *ppage, base, len, flags);
        if (ret == 0)
            ret = err;
        else if (err > 0)
            ret += err;
        if (err != len)
            goto out;
        base = 0;
        ppage++;
    } while ((pglen -= len) != 0);
copy_tail:
    len = xdr->tail[0].iov_len;
    if (base < len) {
        err = xs_send_tail(sock, xdr, base, len);
        if (ret == 0)
            ret = err;
        else if (err > 0)
            ret += err;
    }
out:
    return ret;
}

/**
 * xs_nospace - place task on wait queue if transmit was incomplete
 * @task: task to put to sleep
 *
 */
static void xs_nospace(struct rpc_task *task)
{
    struct rpc_rqst *req = task->tk_rqstp;
    struct rpc_xprt *xprt = req->rq_xprt;

    dprintk("RPC: %4d xmit incomplete (%u left of %u)\n",
            task->tk_pid, req->rq_slen - req->rq_bytes_sent,
            req->rq_slen);

    if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) {
        /* Protect against races with write_space */
        spin_lock_bh(&xprt->transport_lock);

        /* Don't race with disconnect */
        if (!xprt_connected(xprt))
            task->tk_status = -ENOTCONN;
        else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags))
            xprt_wait_for_buffer_space(task);

        spin_unlock_bh(&xprt->transport_lock);
    } else
        /* Keep holding the socket if it is blocked */
        rpc_delay(task, HZ>>4);
}

/**
 * xs_udp_send_request - write an RPC request to a UDP socket
 * @task: address of RPC task that manages the state of an RPC request
 *
 * Return values:
 *        0:	The request has been sent
 *   EAGAIN:	The socket was blocked, please call again later to
 *		complete the request
 * ENOTCONN:	Caller needs to invoke connect logic then call again
 *    other:	Some other error occured, the request was not sent
 */
static int xs_udp_send_request(struct rpc_task *task)
{
    struct rpc_rqst *req = task->tk_rqstp;
    struct rpc_xprt *xprt = req->rq_xprt;
    struct xdr_buf *xdr = &req->rq_snd_buf;
    int status;

    xs_pktdump("packet data:",
               req->rq_svec->iov_base,
               req->rq_svec->iov_len);

    req->rq_xtime = jiffies;
    status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr,
                          sizeof(xprt->addr), xdr, req->rq_bytes_sent);

    dprintk("RPC:      xs_udp_send_request(%u) = %d\n",
            xdr->len - req->rq_bytes_sent, status);

    if (likely(status >= (int) req->rq_slen))
        return 0;

    /* Still some bytes left; set up for a retry later. */
    if (status > 0)
        status = -EAGAIN;

    switch (status) {
    case -ENETUNREACH:
    case -EPIPE:
    case -ECONNREFUSED:
        /* When the server has died, an ICMP port unreachable message
         * prompts ECONNREFUSED. */
        break;
    case -EAGAIN:
        xs_nospace(task);
        break;
    default:
        dprintk("RPC:      sendmsg returned unrecognized error %d\n",
                -status);
        break;
    }

    return status;
}

static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf)
{
    u32 reclen = buf->len - sizeof(rpc_fraghdr);
    rpc_fraghdr *base = buf->head[0].iov_base;
    *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen);
}
Ejemplo n.º 7
0
/**
 * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint
 * @args: RPC transport to connect
 *
 * Invoked by a work queue tasklet.
 */
static void xs_tcp_connect_worker(void *args)
{
    struct rpc_xprt *xprt = (struct rpc_xprt *)args;
    struct socket *sock = xprt->sock;
    int err, status = -EIO;

    if (xprt->shutdown || xprt->addr.sin_port == 0)
        goto out;

    dprintk("RPC:      xs_tcp_connect_worker for xprt %p\n", xprt);

    if (!xprt->sock) {
        /* start from scratch */
        if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
            dprintk("RPC:      can't create TCP transport socket (%d).\n", -err);
            goto out;
        }
        xs_reclassify_socket(sock);

        if (xs_bind(xprt, sock)) {
            sock_release(sock);
            goto out;
        }
    } else
        /* "close" the socket, preserving the local port */
        xs_tcp_reuse_connection(xprt);

    if (!xprt->inet) {
        struct sock *sk = sock->sk;

        write_lock_bh(&sk->sk_callback_lock);

        sk->sk_user_data = xprt;
        xprt->old_data_ready = sk->sk_data_ready;
        xprt->old_state_change = sk->sk_state_change;
        xprt->old_write_space = sk->sk_write_space;
        sk->sk_data_ready = xs_tcp_data_ready;
        sk->sk_state_change = xs_tcp_state_change;
        sk->sk_write_space = xs_tcp_write_space;
        sk->sk_allocation = GFP_ATOMIC;

        /* socket options */
        sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
        sock_reset_flag(sk, SOCK_LINGER);
        tcp_sk(sk)->linger2 = 0;
        tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;

        xprt_clear_connected(xprt);

        /* Reset to new socket */
        xprt->sock = sock;
        xprt->inet = sk;

        write_unlock_bh(&sk->sk_callback_lock);
    }

    /* Tell the socket layer to start connecting... */
    xprt->stat.connect_count++;
    xprt->stat.connect_start = jiffies;
    status = kernel_connect(sock, (struct sockaddr *) &xprt->addr,
                            sizeof(xprt->addr), O_NONBLOCK);
    dprintk("RPC: %p  connect status %d connected %d sock state %d\n",
            xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
    if (status < 0) {
        switch (status) {
        case -EINPROGRESS:
        case -EALREADY:
            goto out_clear;
        case -ECONNREFUSED:
        case -ECONNRESET:
            /* retry with existing socket, after a delay */
            break;
        case -ENETUNREACH:
            status = -ENOTCONN;
            break;
        default:
            /* get rid of existing socket, and retry */
            xs_close(xprt);
            break;
        }
    }
out:
    xprt_wake_pending_tasks(xprt, status);
out_clear:
    xprt_clear_connecting(xprt);
}