/* It shouldn't matter if the number of backchannel session slots * doesn't match the number of RPC/RDMA credits. That just means * one or the other will have extra slots that aren't used. */ static struct rpc_xprt * xprt_setup_rdma_bc(struct xprt_create *args) { struct rpc_xprt *xprt; struct rpcrdma_xprt *new_xprt; if (args->addrlen > sizeof(xprt->addr)) { dprintk("RPC: %s: address too large\n", __func__); return ERR_PTR(-EBADF); } xprt = xprt_alloc(args->net, sizeof(*new_xprt), RPCRDMA_MAX_BC_REQUESTS, RPCRDMA_MAX_BC_REQUESTS); if (!xprt) { dprintk("RPC: %s: couldn't allocate rpc_xprt\n", __func__); return ERR_PTR(-ENOMEM); } xprt->timeout = &xprt_rdma_bc_timeout; xprt_set_bound(xprt); xprt_set_connected(xprt); xprt->bind_timeout = RPCRDMA_BIND_TO; xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; xprt->prot = XPRT_TRANSPORT_BC_RDMA; xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32); xprt->ops = &xprt_rdma_bc_procs; memcpy(&xprt->addr, args->dstaddr, args->addrlen); xprt->addrlen = args->addrlen; xprt_rdma_format_addresses(xprt, (struct sockaddr *)&xprt->addr); xprt->resvport = 0; xprt->max_payload = xprt_rdma_max_inline_read; new_xprt = rpcx_to_rdmax(xprt); new_xprt->rx_buf.rb_bc_max_requests = xprt->max_reqs; xprt_get(xprt); args->bc_xprt->xpt_bc_xprt = xprt; xprt->bc_xprt = args->bc_xprt; if (!try_module_get(THIS_MODULE)) goto out_fail; /* Final put for backchannel xprt is in __svc_rdma_free */ xprt_get(xprt); return xprt; out_fail: xprt_rdma_free_addresses(xprt); args->bc_xprt->xpt_bc_xprt = NULL; args->bc_xprt->xpt_bc_xps = NULL; xprt_put(xprt); xprt_free(xprt); return ERR_PTR(-EINVAL); }
static void xprt_switch_add_xprt_locked(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt) { if (unlikely(xprt_get(xprt) == NULL)) return; list_add_tail_rcu(&xprt->xprt_switch, &xps->xps_xprt_list); smp_wmb(); if (xps->xps_nxprts == 0) xps->xps_net = xprt->xprt_net; xps->xps_nxprts++; }
static struct rpc_xprt *xprt_iter_get_helper(struct rpc_xprt_iter *xpi, struct rpc_xprt *(*fn)(struct rpc_xprt_iter *)) { struct rpc_xprt *ret; do { ret = fn(xpi); if (ret == NULL) break; ret = xprt_get(ret); } while (ret == NULL); return ret; }
/* This function is called from the protocol layer accept() in order to * instanciate a new session on behalf of a given listener and frontend. It * returns a positive value upon success, 0 if the connection can be ignored, * or a negative value upon critical failure. The accepted file descriptor is * closed if we return <= 0. If no handshake is needed, it immediately tries * to instanciate a new stream. The created connection's owner points to the * new session until the upper layers are created. */ int session_accept_fd(struct listener *l, int cfd, struct sockaddr_storage *addr) { struct connection *cli_conn; struct proxy *p = l->bind_conf->frontend; struct session *sess; int ret; ret = -1; /* assume unrecoverable error by default */ if (unlikely((cli_conn = conn_new()) == NULL)) goto out_close; conn_prepare(cli_conn, l->proto, l->bind_conf->xprt); cli_conn->handle.fd = cfd; cli_conn->addr.from = *addr; cli_conn->flags |= CO_FL_ADDR_FROM_SET; cli_conn->target = &l->obj_type; cli_conn->proxy_netns = l->netns; conn_ctrl_init(cli_conn); /* wait for a PROXY protocol header */ if (l->options & LI_O_ACC_PROXY) { cli_conn->flags |= CO_FL_ACCEPT_PROXY; conn_sock_want_recv(cli_conn); } /* wait for a NetScaler client IP insertion protocol header */ if (l->options & LI_O_ACC_CIP) { cli_conn->flags |= CO_FL_ACCEPT_CIP; conn_sock_want_recv(cli_conn); } conn_xprt_want_recv(cli_conn); if (conn_xprt_init(cli_conn) < 0) goto out_free_conn; sess = session_new(p, l, &cli_conn->obj_type); if (!sess) goto out_free_conn; conn_set_owner(cli_conn, sess, NULL); /* now evaluate the tcp-request layer4 rules. We only need a session * and no stream for these rules. */ if ((l->options & LI_O_TCP_L4_RULES) && !tcp_exec_l4_rules(sess)) { /* let's do a no-linger now to close with a single RST. */ setsockopt(cfd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger)); ret = 0; /* successful termination */ goto out_free_sess; } /* monitor-net and health mode are processed immediately after TCP * connection rules. This way it's possible to block them, but they * never use the lower data layers, they send directly over the socket, * as they were designed for. We first flush the socket receive buffer * in order to avoid emission of an RST by the system. We ignore any * error. */ if (unlikely((p->mode == PR_MODE_HEALTH) || ((l->options & LI_O_CHK_MONNET) && addr->ss_family == AF_INET && (((struct sockaddr_in *)addr)->sin_addr.s_addr & p->mon_mask.s_addr) == p->mon_net.s_addr))) { /* we have 4 possibilities here : * - HTTP mode, from monitoring address => send "HTTP/1.0 200 OK" * - HEALTH mode with HTTP check => send "HTTP/1.0 200 OK" * - HEALTH mode without HTTP check => just send "OK" * - TCP mode from monitoring address => just close */ if (l->proto->drain) l->proto->drain(cfd); if (p->mode == PR_MODE_HTTP || (p->mode == PR_MODE_HEALTH && (p->options2 & PR_O2_CHK_ANY) == PR_O2_HTTP_CHK)) send(cfd, "HTTP/1.0 200 OK\r\n\r\n", 19, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_MORE); else if (p->mode == PR_MODE_HEALTH) send(cfd, "OK\n", 3, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_MORE); ret = 0; goto out_free_sess; } /* Adjust some socket options */ if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6) { setsockopt(cfd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)); if (p->options & PR_O_TCP_CLI_KA) setsockopt(cfd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one)); if (p->options & PR_O_TCP_NOLING) fdtab[cfd].linger_risk = 1; #if defined(TCP_MAXSEG) if (l->maxseg < 0) { /* we just want to reduce the current MSS by that value */ int mss; socklen_t mss_len = sizeof(mss); if (getsockopt(cfd, IPPROTO_TCP, TCP_MAXSEG, &mss, &mss_len) == 0) { mss += l->maxseg; /* remember, it's < 0 */ setsockopt(cfd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)); } } #endif } if (global.tune.client_sndbuf) setsockopt(cfd, SOL_SOCKET, SO_SNDBUF, &global.tune.client_sndbuf, sizeof(global.tune.client_sndbuf)); if (global.tune.client_rcvbuf) setsockopt(cfd, SOL_SOCKET, SO_RCVBUF, &global.tune.client_rcvbuf, sizeof(global.tune.client_rcvbuf)); /* OK, now either we have a pending handshake to execute with and then * we must return to the I/O layer, or we can proceed with the end of * the stream initialization. In case of handshake, we also set the I/O * timeout to the frontend's client timeout and register a task in the * session for this purpose. The connection's owner is left to the * session during this period. * * At this point we set the relation between sess/task/conn this way : * * +----------------- task * | | * orig -- sess <-- context | * | ^ | | * v | | | * conn -- owner ---> task <-----+ */ if (cli_conn->flags & (CO_FL_HANDSHAKE | CO_FL_EARLY_SSL_HS)) { if (unlikely((sess->task = task_new(tid_bit)) == NULL)) goto out_free_sess; conn_set_xprt_done_cb(cli_conn, conn_complete_session); sess->task->context = sess; sess->task->nice = l->nice; sess->task->process = session_expire_embryonic; sess->task->expire = tick_add_ifset(now_ms, p->timeout.client); task_queue(sess->task); return 1; } /* OK let's complete stream initialization since there is no handshake */ if (conn_complete_session(cli_conn) >= 0) return 1; /* error unrolling */ out_free_sess: /* prevent call to listener_release during session_free. It will be * done below, for all errors. */ sess->listener = NULL; session_free(sess); out_free_conn: conn_stop_tracking(cli_conn); conn_xprt_close(cli_conn); conn_free(cli_conn); out_close: listener_release(l); if (ret < 0 && l->bind_conf->xprt == xprt_get(XPRT_RAW) && p->mode == PR_MODE_HTTP) { /* critical error, no more memory, try to emit a 500 response */ struct chunk *err_msg = &p->errmsg[HTTP_ERR_500]; if (!err_msg->str) err_msg = &http_err_chunks[HTTP_ERR_500]; send(cfd, err_msg->str, err_msg->len, MSG_DONTWAIT|MSG_NOSIGNAL); } if (fdtab[cfd].owner) fd_delete(cfd); else close(cfd); return ret; }
/** * rpcb_register - set or unset a port registration with the local rpcbind svc * @prog: RPC program number to bind * @vers: RPC version number to bind * @prot: transport protocol to use to make this request * @port: port value to register * @okay: result code * * port == 0 means unregister, port != 0 means register. * * This routine supports only rpcbind version 2. */ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) { struct sockaddr_in sin = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_LOOPBACK), }; struct rpcbind_args map = { .r_prog = prog, .r_vers = vers, .r_prot = prot, .r_port = port, }; struct rpc_message msg = { .rpc_proc = &rpcb_procedures2[port ? RPCBPROC_SET : RPCBPROC_UNSET], .rpc_argp = &map, .rpc_resp = okay, }; struct rpc_clnt *rpcb_clnt; int error = 0; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), prog, vers, prot, port); rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, sizeof(sin), XPRT_TRANSPORT_UDP, 2, 1); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); error = rpc_call_sync(rpcb_clnt, &msg, 0); rpc_shutdown_client(rpcb_clnt); if (error < 0) printk(KERN_WARNING "RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); dprintk("RPC: registration status %d/%d\n", error, *okay); return error; } /** * rpcb_getport_sync - obtain the port for an RPC service on a given host * @sin: address of remote peer * @prog: RPC program number to bind * @vers: RPC version number to bind * @prot: transport protocol to use to make this request * * Return value is the requested advertised port number, * or a negative errno value. * * Called from outside the RPC client in a synchronous task context. * Uses default timeout parameters specified by underlying transport. * * XXX: Needs to support IPv6 */ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) { struct rpcbind_args map = { .r_prog = prog, .r_vers = vers, .r_prot = prot, .r_port = 0, }; struct rpc_message msg = { .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], .rpc_argp = &map, .rpc_resp = &map.r_port, }; struct rpc_clnt *rpcb_clnt; int status; dprintk("RPC: %s(" NIPQUAD_FMT ", %u, %u, %d)\n", __FUNCTION__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin, sizeof(*sin), prot, 2, 0); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); status = rpc_call_sync(rpcb_clnt, &msg, 0); rpc_shutdown_client(rpcb_clnt); if (status >= 0) { if (map.r_port != 0) return map.r_port; status = -EACCES; } return status; } EXPORT_SYMBOL_GPL(rpcb_getport_sync); static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, int version) { struct rpc_message msg = { .rpc_proc = rpcb_next_version[version].rpc_proc, .rpc_argp = map, .rpc_resp = &map->r_port, }; struct rpc_task_setup task_setup_data = { .rpc_client = rpcb_clnt, .rpc_message = &msg, .callback_ops = &rpcb_getport_ops, .callback_data = map, .flags = RPC_TASK_ASYNC, }; return rpc_run_task(&task_setup_data); } /** * rpcb_getport_async - obtain the port for a given RPC service on a given host * @task: task that is waiting for portmapper request * * This one can be called for an ongoing RPC request, and can be used in * an async (rpciod) context. */ void rpcb_getport_async(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; u32 bind_version; struct rpc_xprt *xprt = task->tk_xprt; struct rpc_clnt *rpcb_clnt; static struct rpcbind_args *map; struct rpc_task *child; struct sockaddr_storage addr; struct sockaddr *sap = (struct sockaddr *)&addr; size_t salen; int status; struct rpcb_info *info; dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", task->tk_pid, __FUNCTION__, clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot); /* Autobind on cloned rpc clients is discouraged */ BUG_ON(clnt->cl_parent != clnt); if (xprt_test_and_set_binding(xprt)) { status = -EAGAIN; /* tell caller to check again */ dprintk("RPC: %5u %s: waiting for another binder\n", task->tk_pid, __FUNCTION__); goto bailout_nowake; } /* Put self on queue before sending rpcbind request, in case * rpcb_getport_done completes before we return from rpc_run_task */ rpc_sleep_on(&xprt->binding, task, NULL, NULL); /* Someone else may have bound if we slept */ if (xprt_bound(xprt)) { status = 0; dprintk("RPC: %5u %s: already bound\n", task->tk_pid, __FUNCTION__); goto bailout_nofree; } salen = rpc_peeraddr(clnt, sap, sizeof(addr)); /* Don't ever use rpcbind v2 for AF_INET6 requests */ switch (sap->sa_family) { case AF_INET: info = rpcb_next_version; break; case AF_INET6: info = rpcb_next_version6; break; default: status = -EAFNOSUPPORT; dprintk("RPC: %5u %s: bad address family\n", task->tk_pid, __FUNCTION__); goto bailout_nofree; } if (info[xprt->bind_index].rpc_proc == NULL) { xprt->bind_index = 0; status = -EPFNOSUPPORT; dprintk("RPC: %5u %s: no more getport versions available\n", task->tk_pid, __FUNCTION__); goto bailout_nofree; } bind_version = info[xprt->bind_index].rpc_vers; dprintk("RPC: %5u %s: trying rpcbind version %u\n", task->tk_pid, __FUNCTION__, bind_version); rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot, bind_version, 0); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt)); goto bailout_nofree; } map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC); if (!map) { status = -ENOMEM; dprintk("RPC: %5u %s: no memory available\n", task->tk_pid, __FUNCTION__); goto bailout_nofree; } map->r_prog = clnt->cl_prog; map->r_vers = clnt->cl_vers; map->r_prot = xprt->prot; map->r_port = 0; map->r_xprt = xprt_get(xprt); map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ child = rpcb_call_async(rpcb_clnt, map, xprt->bind_index); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { status = -EIO; dprintk("RPC: %5u %s: rpc_run_task failed\n", task->tk_pid, __FUNCTION__); goto bailout; } rpc_put_task(child); task->tk_xprt->stat.bind_count++; return; bailout: kfree(map); xprt_put(xprt); bailout_nofree: rpcb_wake_rpcbind_waiters(xprt, status); bailout_nowake: task->tk_status = status; } EXPORT_SYMBOL_GPL(rpcb_getport_async); /* * Rpcbind child task calls this callback via tk_exit. */ static void rpcb_getport_done(struct rpc_task *child, void *data) { struct rpcbind_args *map = data; struct rpc_xprt *xprt = map->r_xprt; int status = child->tk_status; /* Garbage reply: retry with a lesser rpcbind version */ if (status == -EIO) status = -EPROTONOSUPPORT; /* rpcbind server doesn't support this rpcbind protocol version */ if (status == -EPROTONOSUPPORT) xprt->bind_index++; if (status < 0) { /* rpcbind server not available on remote host? */ xprt->ops->set_port(xprt, 0); } else if (map->r_port == 0) { /* Requested RPC service wasn't registered on remote host */ xprt->ops->set_port(xprt, 0); status = -EACCES; } else { /* Succeeded */ xprt->ops->set_port(xprt, map->r_port); xprt_set_bound(xprt); status = 0; } dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n", child->tk_pid, status, map->r_port); rpcb_wake_rpcbind_waiters(xprt, status); } static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, struct rpcbind_args *rpcb) { dprintk("RPC: rpcb_encode_mapping(%u, %u, %d, %u)\n", rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); *p++ = htonl(rpcb->r_prog); *p++ = htonl(rpcb->r_vers); *p++ = htonl(rpcb->r_prot); *p++ = htonl(rpcb->r_port); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p, unsigned short *portp) { *portp = (unsigned short) ntohl(*p++); dprintk("RPC: rpcb_decode_getport result %u\n", *portp); return 0; } static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, unsigned int *boolp) { *boolp = (unsigned int) ntohl(*p++); dprintk("RPC: rpcb_decode_set result %u\n", *boolp); return 0; } static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p, struct rpcbind_args *rpcb) { dprintk("RPC: rpcb_encode_getaddr(%u, %u, %s)\n", rpcb->r_prog, rpcb->r_vers, rpcb->r_addr); *p++ = htonl(rpcb->r_prog); *p++ = htonl(rpcb->r_vers); p = xdr_encode_string(p, rpcb->r_netid); p = xdr_encode_string(p, rpcb->r_addr); p = xdr_encode_string(p, rpcb->r_owner); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; }