static int packet_release(struct socket *sock) { struct sock *sk = sock->sk; struct sock **skp; if (!sk) return 0; write_lock_bh(&packet_sklist_lock); for (skp = &packet_sklist; *skp; skp = &(*skp)->next) { if (*skp == sk) { *skp = sk->next; __sock_put(sk); break; } } write_unlock_bh(&packet_sklist_lock); /* * Unhook packet receive handler. */ if (sk->protinfo.af_packet->running) { /* * Remove the protocol hook */ dev_remove_pack(&sk->protinfo.af_packet->prot_hook); sk->protinfo.af_packet->running = 0; __sock_put(sk); } #ifdef CONFIG_PACKET_MULTICAST packet_flush_mclist(sk); #endif #ifdef CONFIG_PACKET_MMAP if (sk->protinfo.af_packet->pg_vec) { struct tpacket_req req; memset(&req, 0, sizeof(req)); packet_set_ring(sk, &req, 1); } #endif /* * Now the socket is dead. No more input will appear. */ sock_orphan(sk); sock->sk = NULL; /* Purge queues */ skb_queue_purge(&sk->receive_queue); sock_put(sk); return 0; }
static void l2cap_sock_clear_timer(struct sock *sk) { BT_DBG("sock %p state %d", sk, sk->state); if (timer_pending(&sk->timer) && del_timer(&sk->timer)) __sock_put(sk); }
static int netlink_release(struct socket *sock) { struct sock *sk = sock->sk; if (!sk) return 0; netlink_remove(sk); spin_lock(&sk->protinfo.af_netlink->cb_lock); if (sk->protinfo.af_netlink->cb) { sk->protinfo.af_netlink->cb->done(sk->protinfo.af_netlink->cb); netlink_destroy_callback(sk->protinfo.af_netlink->cb); sk->protinfo.af_netlink->cb = NULL; __sock_put(sk); } spin_unlock(&sk->protinfo.af_netlink->cb_lock); /* OK. Socket is unlinked, and, therefore, no new packets will arrive */ sock_orphan(sk); sock->sk = NULL; wake_up_interruptible_all(&sk->protinfo.af_netlink->wait); skb_queue_purge(&sk->write_queue); sock_put(sk); return 0; }
static void sco_sock_clear_timer(struct sock *sk) { BT_DBG("sock %p state %d", sk, sk->state); cyg_alarm_disable(sk->timeout_alarm_handle); cyg_alarm_delete(sk->timeout_alarm_handle); // if (timer_pending(&sk->timer) && del_timer(&sk->timer)) __sock_put(sk); }
void tcp_clear_xmit_timers(struct sock *sk) { struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; tp->pending = 0; if (timer_pending(&tp->retransmit_timer) && del_timer(&tp->retransmit_timer)) __sock_put(sk); tp->ack.pending = 0; tp->ack.blocked = 0; if (timer_pending(&tp->delack_timer) && del_timer(&tp->delack_timer)) __sock_put(sk); if(timer_pending(&sk->timer) && del_timer(&sk->timer)) __sock_put(sk); }
static int packet_release(struct socket *sock) { struct sock *sk = sock->sk; struct packet_sock *po; if (!sk) return 0; po = pkt_sk(sk); write_lock_bh(&packet_sklist_lock); sk_del_node_init(sk); write_unlock_bh(&packet_sklist_lock); /* * Unhook packet receive handler. */ if (po->running) { /* * Remove the protocol hook */ dev_remove_pack(&po->prot_hook); po->running = 0; po->num = 0; __sock_put(sk); } #ifdef CONFIG_PACKET_MULTICAST packet_flush_mclist(sk); #endif #ifdef CONFIG_PACKET_MMAP if (po->pg_vec) { struct tpacket_req req; memset(&req, 0, sizeof(req)); packet_set_ring(sk, &req, 1); } #endif /* * Now the socket is dead. No more input will appear. */ sock_orphan(sk); sock->sk = NULL; /* Purge queues */ skb_queue_purge(&sk->sk_receive_queue); sock_put(sk); return 0; }
static void raw_v6_unhash(struct sock *sk) { write_lock_bh(&raw_v6_lock); if (sk->pprev) { if (sk->next) sk->next->pprev = sk->pprev; *sk->pprev = sk->next; sk->pprev = NULL; sock_prot_dec_use(sk->prot); __sock_put(sk); } write_unlock_bh(&raw_v6_lock); }
static void udp_v4_unhash(struct sock *sk) { write_lock_bh(&udp_hash_lock); if (sk->pprev) { if (sk->next) sk->next->pprev = sk->pprev; *sk->pprev = sk->next; sk->pprev = NULL; sk->num = 0; sock_prot_dec_use(sk->prot); __sock_put(sk); } write_unlock_bh(&udp_hash_lock); }
static void netlink_remove(struct sock *sk) { struct sock **skp; netlink_table_grab(); for (skp = &nl_table[sk->protocol]; *skp; skp = &((*skp)->next)) { if (*skp == sk) { *skp = sk->next; __sock_put(sk); break; } } netlink_table_ungrab(); }
void bluez_sock_unlink(struct bluez_sock_list *l, struct sock *sk) { struct sock **skp; write_lock(&l->lock); for (skp = &l->head; *skp; skp = &((*skp)->next)) { if (*skp == sk) { *skp = sk->next; __sock_put(sk); break; } } write_unlock(&l->lock); }
static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol) { struct packet_sock *po = pkt_sk(sk); /* * Detach an existing hook if present. */ lock_sock(sk); spin_lock(&po->bind_lock); if (po->running) { __sock_put(sk); po->running = 0; po->num = 0; spin_unlock(&po->bind_lock); dev_remove_pack(&po->prot_hook); spin_lock(&po->bind_lock); } po->num = protocol; po->prot_hook.type = protocol; po->prot_hook.dev = dev; po->ifindex = dev ? dev->ifindex : 0; if (protocol == 0) goto out_unlock; if (dev) { if (dev->flags&IFF_UP) { dev_add_pack(&po->prot_hook); sock_hold(sk); po->running = 1; } else { sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) sk->sk_error_report(sk); } } else { dev_add_pack(&po->prot_hook); sock_hold(sk); po->running = 1; } out_unlock: spin_unlock(&po->bind_lock); release_sock(sk); return 0; }
static inline void l2cap_chan_unlink(struct l2cap_chan_list *l, struct sock *sk) { struct sock *next = l2cap_pi(sk)->next_c, *prev = l2cap_pi(sk)->prev_c; write_lock(&l->lock); if (sk == l->head) l->head = next; if (next) l2cap_pi(next)->prev_c = prev; if (prev) l2cap_pi(prev)->next_c = next; write_unlock(&l->lock); __sock_put(sk); }
static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol) { /* * Detach an existing hook if present. */ lock_sock(sk); spin_lock(&sk->protinfo.af_packet->bind_lock); if (sk->protinfo.af_packet->running) { dev_remove_pack(&sk->protinfo.af_packet->prot_hook); __sock_put(sk); sk->protinfo.af_packet->running = 0; } sk->num = protocol; sk->protinfo.af_packet->prot_hook.type = protocol; sk->protinfo.af_packet->prot_hook.dev = dev; sk->protinfo.af_packet->ifindex = dev ? dev->ifindex : 0; if (protocol == 0) goto out_unlock; if (dev) { if (dev->flags&IFF_UP) { dev_add_pack(&sk->protinfo.af_packet->prot_hook); sock_hold(sk); sk->protinfo.af_packet->running = 1; } else { sk->err = ENETDOWN; if (!sk->dead) sk->error_report(sk); } } else { dev_add_pack(&sk->protinfo.af_packet->prot_hook); sock_hold(sk); sk->protinfo.af_packet->running = 1; } out_unlock: spin_unlock(&sk->protinfo.af_packet->bind_lock); release_sock(sk); return 0; }
void pn_sock_unbind_all_res(struct sock *sk) { unsigned res, match = 0; mutex_lock(&resource_mutex); for (res = 0; res < 256; res++) { if (pnres.sk[res] == sk) { RCU_INIT_POINTER(pnres.sk[res], NULL); match++; } } mutex_unlock(&resource_mutex); while (match > 0) { __sock_put(sk); match--; } /* Caller is responsible for RCU sync before final sock_put() */ }
static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) { struct pep_sock *pn = pep_sk(sk); struct sock *newsk = NULL; struct sk_buff *oskb; int err; lock_sock(sk); err = pep_wait_connreq(sk, flags & O_NONBLOCK); if (err) goto out; newsk = __sk_head(&pn->ackq); oskb = skb_dequeue(&newsk->sk_receive_queue); err = pep_accept_conn(newsk, oskb); if (err) { skb_queue_head(&newsk->sk_receive_queue, oskb); newsk = NULL; goto out; } kfree_skb(oskb); sock_hold(sk); pep_sk(newsk)->listener = sk; sock_hold(newsk); sk_del_node_init(newsk); sk_acceptq_removed(sk); sk_add_node(newsk, &pn->hlist); __sock_put(newsk); out: release_sock(sk); *errp = err; return newsk; }
static int netlink_release(struct socket *sock) { struct sock *sk = sock->sk; if (!sk) return 0; netlink_remove(sk); spin_lock(&sk->protinfo.af_netlink->cb_lock); if (sk->protinfo.af_netlink->cb) { sk->protinfo.af_netlink->cb->done(sk->protinfo.af_netlink->cb); netlink_destroy_callback(sk->protinfo.af_netlink->cb); sk->protinfo.af_netlink->cb = NULL; __sock_put(sk); } spin_unlock(&sk->protinfo.af_netlink->cb_lock); /* OK. Socket is unlinked, and, therefore, no new packets will arrive */ sock_orphan(sk); sock->sk = NULL; wake_up_interruptible_all(&sk->protinfo.af_netlink->wait); skb_queue_purge(&sk->write_queue); if (sk->protinfo.af_netlink->pid && !sk->protinfo.af_netlink->groups) { struct netlink_notify n = { protocol:sk->protocol, pid:sk->protinfo.af_netlink->pid }; notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n); } sock_put(sk); return 0; }
static int netlink_release(struct socket *sock) { struct sock *sk = sock->sk; struct netlink_opt *nlk; if (!sk) return 0; netlink_remove(sk); nlk = nlk_sk(sk); spin_lock(&nlk->cb_lock); if (nlk->cb) { nlk->cb->done(nlk->cb); netlink_destroy_callback(nlk->cb); nlk->cb = NULL; __sock_put(sk); } spin_unlock(&nlk->cb_lock); /* OK. Socket is unlinked, and, therefore, no new packets will arrive */ sock_orphan(sk); sock->sk = NULL; wake_up_interruptible_all(&nlk->wait); skb_queue_purge(&sk->sk_write_queue); if (nlk->pid && !nlk->groups) { struct netlink_notify n = { .protocol = sk->sk_protocol, .pid = nlk->pid, }; notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n); } sock_put(sk); return 0; } static int netlink_autobind(struct socket *sock) { struct sock *sk = sock->sk; struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; struct sock *osk; struct hlist_node *node; s32 pid = current->pid; int err; static s32 rover = -4097; retry: cond_resched(); netlink_table_grab(); head = nl_pid_hashfn(hash, pid); sk_for_each(osk, node, head) { if (nlk_sk(osk)->pid == pid) { /* Bind collision, search negative pid values. */ pid = rover--; if (rover > -4097) rover = -4097; netlink_table_ungrab(); goto retry; } } netlink_table_ungrab(); err = netlink_insert(sk, pid); if (err == -EADDRINUSE) goto retry; nlk_sk(sk)->groups = 0; return 0; } static inline int netlink_capable(struct socket *sock, unsigned int flag) { return (nl_nonroot[sock->sk->sk_protocol] & flag) || capable(CAP_NET_ADMIN); } static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sock *sk = sock->sk; struct netlink_opt *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; if (nladdr->nl_family != AF_NETLINK) return -EINVAL; /* Only superuser is allowed to listen multicasts */ if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_RECV)) return -EPERM; if (nlk->pid) { if (nladdr->nl_pid != nlk->pid) return -EINVAL; } else { err = nladdr->nl_pid ? netlink_insert(sk, nladdr->nl_pid) : netlink_autobind(sock); if (err) return err; } if (!nladdr->nl_groups && !nlk->groups) return 0; netlink_table_grab(); if (nlk->groups && !nladdr->nl_groups) __sk_del_bind_node(sk); else if (!nlk->groups && nladdr->nl_groups) sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); nlk->groups = nladdr->nl_groups; netlink_table_ungrab(); return 0; } static int netlink_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) { int err = 0; struct sock *sk = sock->sk; struct netlink_opt *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr=(struct sockaddr_nl*)addr; if (addr->sa_family == AF_UNSPEC) { sk->sk_state = NETLINK_UNCONNECTED; nlk->dst_pid = 0; nlk->dst_groups = 0; return 0; } if (addr->sa_family != AF_NETLINK) return -EINVAL; /* Only superuser is allowed to send multicasts */ if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND)) return -EPERM; if (!nlk->pid) err = netlink_autobind(sock); if (err == 0) { sk->sk_state = NETLINK_CONNECTED; nlk->dst_pid = nladdr->nl_pid; nlk->dst_groups = nladdr->nl_groups; } return err; }
int ip6_push_pending_frames(struct sock *sk) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = np->cork.opt; struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; struct flowi *fl = &inet->cork.fl; unsigned char proto = fl->proto; int err = 0; if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); /* move skb->data to ip header from ext header */ if (skb->data < skb_network_header(skb)) __skb_pull(skb, skb_network_offset(skb)); while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { __skb_pull(tmp_skb, skb_network_header_len(skb)); *tail_skb = tmp_skb; tail_skb = &(tmp_skb->next); skb->len += tmp_skb->len; skb->data_len += tmp_skb->len; skb->truesize += tmp_skb->truesize; __sock_put(tmp_skb->sk); tmp_skb->destructor = NULL; tmp_skb->sk = NULL; } /* Allow local fragmentation. */ if (np->pmtudisc < IPV6_PMTUDISC_DO) skb->local_df = 1; ipv6_addr_copy(final_dst, &fl->fl6_dst); __skb_pull(skb, skb_network_header_len(skb)); if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt && opt->opt_nflen) ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); hdr = ipv6_hdr(skb); *(__be32*)hdr = fl->fl6_flowlabel | htonl(0x60000000 | ((int)np->cork.tclass << 20)); hdr->hop_limit = np->cork.hop_limit; hdr->nexthdr = proto; ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); ipv6_addr_copy(&hdr->daddr, final_dst); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; skb->dst = dst_clone(&rt->u.dst); IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb->dst); ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type); ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); } err = ip6_local_out(skb); if (err) { if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; if (err) goto error; } out: ip6_cork_release(inet, np); return err; error: goto out; }
int ip6_push_pending_frames(struct sock *sk) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = np->cork.opt; struct rt6_info *rt = np->cork.rt; struct flowi *fl = &inet->cork.fl; unsigned char proto = fl->proto; int err = 0; if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); /* move skb->data to ip header from ext header */ if (skb->data < skb->nh.raw) __skb_pull(skb, skb->nh.raw - skb->data); while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw); *tail_skb = tmp_skb; tail_skb = &(tmp_skb->next); skb->len += tmp_skb->len; skb->data_len += tmp_skb->len; skb->truesize += tmp_skb->truesize; __sock_put(tmp_skb->sk); tmp_skb->destructor = NULL; tmp_skb->sk = NULL; } ipv6_addr_copy(final_dst, &fl->fl6_dst); __skb_pull(skb, skb->h.raw - skb->nh.raw); if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt && opt->opt_nflen) ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr)); *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000); if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); else hdr->payload_len = 0; hdr->hop_limit = np->cork.hop_limit; hdr->nexthdr = proto; ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); ipv6_addr_copy(&hdr->daddr, final_dst); skb->dst = dst_clone(&rt->u.dst); IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); if (err) { if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; if (err) goto error; } out: inet->cork.flags &= ~IPCORK_OPT; if (np->cork.opt) { kfree(np->cork.opt); np->cork.opt = NULL; } if (np->cork.rt) { dst_release(&np->cork.rt->u.dst); np->cork.rt = NULL; inet->cork.flags &= ~IPCORK_ALLFRAG; } memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); return err; error: goto out; }
static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) { struct pep_sock *pn = pep_sk(sk), *newpn; struct sock *newsk = NULL; struct sk_buff *skb; struct pnpipehdr *hdr; struct sockaddr_pn dst, src; int err; u16 peer_type; u8 pipe_handle, enabled, n_sb; u8 aligned = 0; skb = skb_recv_datagram(sk, 0, flags & O_NONBLOCK, errp); if (!skb) return NULL; lock_sock(sk); if (sk->sk_state != TCP_LISTEN) { err = -EINVAL; goto drop; } sk_acceptq_removed(sk); err = -EPROTO; if (!pskb_may_pull(skb, sizeof(*hdr) + 4)) goto drop; hdr = pnp_hdr(skb); pipe_handle = hdr->pipe_handle; switch (hdr->state_after_connect) { case PN_PIPE_DISABLE: enabled = 0; break; case PN_PIPE_ENABLE: enabled = 1; break; default: pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM, GFP_KERNEL); goto drop; } peer_type = hdr->other_pep_type << 8; /* Parse sub-blocks (options) */ n_sb = hdr->data[4]; while (n_sb > 0) { u8 type, buf[1], len = sizeof(buf); const u8 *data = pep_get_sb(skb, &type, &len, buf); if (data == NULL) goto drop; switch (type) { case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE: if (len < 1) goto drop; peer_type = (peer_type & 0xff00) | data[0]; break; case PN_PIPE_SB_ALIGNED_DATA: aligned = data[0] != 0; break; } n_sb--; } /* Check for duplicate pipe handle */ newsk = pep_find_pipe(&pn->hlist, &dst, pipe_handle); if (unlikely(newsk)) { __sock_put(newsk); newsk = NULL; pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE, GFP_KERNEL); goto drop; } /* Create a new to-be-accepted sock */ newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot); if (!newsk) { pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL); err = -ENOBUFS; goto drop; } sock_init_data(NULL, newsk); newsk->sk_state = TCP_SYN_RECV; newsk->sk_backlog_rcv = pipe_do_rcv; newsk->sk_protocol = sk->sk_protocol; newsk->sk_destruct = pipe_destruct; newpn = pep_sk(newsk); pn_skb_get_dst_sockaddr(skb, &dst); pn_skb_get_src_sockaddr(skb, &src); newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst); newpn->pn_sk.dobject = pn_sockaddr_get_object(&src); newpn->pn_sk.resource = pn_sockaddr_get_resource(&dst); sock_hold(sk); newpn->listener = sk; skb_queue_head_init(&newpn->ctrlreq_queue); newpn->pipe_handle = pipe_handle; atomic_set(&newpn->tx_credits, 0); newpn->ifindex = 0; newpn->peer_type = peer_type; newpn->rx_credits = 0; newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; newpn->init_enable = enabled; newpn->aligned = aligned; err = pep_accept_conn(newsk, skb); if (err) { sock_put(newsk); newsk = NULL; goto drop; } sk_add_node(newsk, &pn->hlist); drop: release_sock(sk); kfree_skb(skb); *errp = err; return newsk; }
static int sdp_init_qp(struct sock *sk, struct rdma_cm_id *id) { struct ib_qp_init_attr qp_init_attr = { .event_handler = sdp_qp_event_handler, .cap.max_send_wr = SDP_TX_SIZE, .cap.max_recv_wr = sdp_rx_size, .cap.max_inline_data = sdp_inline_thresh, .sq_sig_type = IB_SIGNAL_REQ_WR, .qp_type = IB_QPT_RC, }; struct ib_device *device = id->device; int rc; sdp_dbg(sk, "%s\n", __func__); sdp_sk(sk)->max_sge = sdp_get_max_dev_sge(device); sdp_dbg(sk, "Max sges: %d\n", sdp_sk(sk)->max_sge); qp_init_attr.cap.max_send_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_SEND_SGES); sdp_dbg(sk, "Setting max send sge to: %d\n", qp_init_attr.cap.max_send_sge); qp_init_attr.cap.max_recv_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_RECV_SGES); sdp_dbg(sk, "Setting max recv sge to: %d\n", qp_init_attr.cap.max_recv_sge); sdp_sk(sk)->sdp_dev = ib_get_client_data(device, &sdp_client); if (!sdp_sk(sk)->sdp_dev) { sdp_warn(sk, "SDP not available on device %s\n", device->name); rc = -ENODEV; goto err_rx; } rc = sdp_rx_ring_create(sdp_sk(sk), device); if (rc) goto err_rx; rc = sdp_tx_ring_create(sdp_sk(sk), device); if (rc) goto err_tx; qp_init_attr.recv_cq = sdp_sk(sk)->rx_ring.cq; qp_init_attr.send_cq = sdp_sk(sk)->tx_ring.cq; rc = rdma_create_qp(id, sdp_sk(sk)->sdp_dev->pd, &qp_init_attr); if (rc) { sdp_warn(sk, "Unable to create QP: %d.\n", rc); goto err_qp; } sdp_sk(sk)->qp = id->qp; sdp_sk(sk)->ib_device = device; sdp_sk(sk)->qp_active = 1; sdp_sk(sk)->context.device = device; sdp_sk(sk)->inline_thresh = qp_init_attr.cap.max_inline_data; sdp_dbg(sk, "%s done\n", __func__); return 0; err_qp: sdp_tx_ring_destroy(sdp_sk(sk)); err_tx: sdp_rx_ring_destroy(sdp_sk(sk)); err_rx: return rc; } static int sdp_get_max_send_frags(u32 buf_size) { return MIN( /* +1 to conpensate on not aligned buffers */ (PAGE_ALIGN(buf_size) >> PAGE_SHIFT) + 1, SDP_MAX_SEND_SGES - 1); } static int sdp_connect_handler(struct sock *sk, struct rdma_cm_id *id, struct rdma_cm_event *event) { struct sockaddr_in *dst_addr; struct sock *child; const struct sdp_hh *h; int rc = 0; sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id); h = event->param.conn.private_data; SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); if (h->ipv_cap & HH_IPV_MASK & ~(HH_IPV4 | HH_IPV6)) { sdp_warn(sk, "Bad IPV field in SDP Hello header: 0x%x\n", h->ipv_cap & HH_IPV_MASK); return -EINVAL; } if (!h->max_adverts) return -EINVAL; #if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)) child = sk_clone(sk, GFP_KERNEL); #else child = sk_clone_lock(sk, GFP_KERNEL); #endif if (!child) return -ENOMEM; sdp_init_sock(child); dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; sdp_inet_dport(child) = dst_addr->sin_port; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (inet6_sk(sk)) { struct ipv6_pinfo *newnp; newnp = inet_sk(child)->pinet6 = sdp_inet6_sk_generic(child); memcpy(newnp, inet6_sk(sk), sizeof(struct ipv6_pinfo)); if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV4) { /* V6 mapped */ sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr; ipv6_addr_set(&child->sk_v6_daddr, 0, 0, htonl(0x0000FFFF), h->src_addr.ip4.addr); ipv6_addr_set(&child->sk_v6_rcv_saddr, 0, 0, htonl(0x0000FFFF), h->dst_addr.ip4.addr); ipv6_addr_copy(&child->sk_v6_rcv_saddr, &child->sk_v6_daddr); } else if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV6) { struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst_addr; struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *)&id->route.addr.src_addr; ipv6_addr_copy(&child->sk_v6_daddr, &dst_addr6->sin6_addr); ipv6_addr_copy(&child->sk_v6_rcv_saddr, &src_addr6->sin6_addr); ipv6_addr_copy(&newnp->saddr, &src_addr6->sin6_addr); } else { sdp_warn(child, "Bad IPV field: 0x%x\n", h->ipv_cap & HH_IPV_MASK); } sdp_inet_daddr(child) = sdp_inet_saddr(child) = sdp_inet_rcv_saddr(child) = LOOPBACK4_IPV6; } else #endif { sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr; } #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif __sock_put(child, SOCK_REF_CLONE); down_read(&device_removal_lock); rc = sdp_init_qp(child, id); if (rc) { bh_unlock_sock(child); up_read(&device_removal_lock); sdp_sk(child)->destructed_already = 1; #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_close(child); #endif sk_free(child); return rc; } sdp_sk(child)->max_bufs = ntohs(h->bsdh.bufs); atomic_set(&sdp_sk(child)->tx_ring.credits, sdp_sk(child)->max_bufs); sdp_sk(child)->min_bufs = tx_credits(sdp_sk(child)) / 4; sdp_sk(child)->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh); sdp_sk(child)->send_frags = sdp_get_max_send_frags(sdp_sk(child)->xmit_size_goal); sdp_init_buffers(sdp_sk(child), rcvbuf_initial_size); id->context = child; sdp_sk(child)->id = id; list_add_tail(&sdp_sk(child)->backlog_queue, &sdp_sk(sk)->backlog_queue); sdp_sk(child)->parent = sk; bh_unlock_sock(child); sdp_add_sock(sdp_sk(child)); up_read(&device_removal_lock); sdp_exch_state(child, TCPF_LISTEN | TCPF_CLOSE, TCP_SYN_RECV); /* child->sk_write_space(child); */ /* child->sk_data_ready(child, 0); */ sk->sk_data_ready(sk); return 0; } static int sdp_response_handler(struct sock *sk, struct rdma_cm_id *id, struct rdma_cm_event *event) { const struct sdp_hah *h; struct sockaddr_in *dst_addr; sdp_dbg(sk, "%s\n", __func__); sdp_exch_state(sk, TCPF_SYN_SENT, TCP_ESTABLISHED); sdp_set_default_moderation(sdp_sk(sk)); if (sock_flag(sk, SOCK_KEEPOPEN)) sdp_start_keepalive_timer(sk); if (sock_flag(sk, SOCK_DEAD)) return 0; h = event->param.conn.private_data; SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); sdp_sk(sk)->max_bufs = ntohs(h->bsdh.bufs); atomic_set(&sdp_sk(sk)->tx_ring.credits, sdp_sk(sk)->max_bufs); sdp_sk(sk)->min_bufs = tx_credits(sdp_sk(sk)) / 4; sdp_sk(sk)->xmit_size_goal = ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh); sdp_sk(sk)->send_frags = sdp_get_max_send_frags(sdp_sk(sk)->xmit_size_goal); sdp_sk(sk)->xmit_size_goal = MIN(sdp_sk(sk)->xmit_size_goal, sdp_sk(sk)->send_frags * PAGE_SIZE); sdp_sk(sk)->poll_cq = 1; sk->sk_state_change(sk); sk_wake_async(sk, 0, POLL_OUT); dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; sdp_inet_dport(sk) = dst_addr->sin_port; sdp_inet_daddr(sk) = dst_addr->sin_addr.s_addr; #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif return 0; } static int sdp_connected_handler(struct sock *sk) { struct sock *parent; sdp_dbg(sk, "%s\n", __func__); parent = sdp_sk(sk)->parent; BUG_ON(!parent); sdp_exch_state(sk, TCPF_SYN_RECV, TCP_ESTABLISHED); #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif sdp_set_default_moderation(sdp_sk(sk)); if (sock_flag(sk, SOCK_KEEPOPEN)) sdp_start_keepalive_timer(sk); if (sock_flag(sk, SOCK_DEAD)) return 0; lock_sock(parent); if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */ sdp_dbg(sk, "parent is going away.\n"); goto done; } sk_acceptq_added(parent); sdp_dbg(parent, "%s child connection established\n", __func__); list_del_init(&sdp_sk(sk)->backlog_queue); list_add_tail(&sdp_sk(sk)->accept_queue, &sdp_sk(parent)->accept_queue); parent->sk_state_change(parent); sk_wake_async(parent, 0, POLL_OUT); done: release_sock(parent); return 0; } static int sdp_disconnected_handler(struct sock *sk) { struct sdp_sock *ssk = sdp_sk(sk); sdp_dbg(sk, "%s\n", __func__); if (ssk->tx_ring.cq) if (sdp_xmit_poll(ssk, 1)) sdp_post_sends(ssk, 0); if (sk->sk_state == TCP_SYN_RECV) { sdp_connected_handler(sk); if (rcv_nxt(ssk)) return 0; } return -ECONNRESET; } int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct rdma_conn_param conn_param; struct sock *parent = NULL; struct sock *child = NULL; struct sock *sk; struct sdp_hah hah; struct sdp_hh hh; int rc = 0, rc2; sk = id->context; if (!sk) { sdp_dbg(NULL, "cm_id is being torn down, event %s\n", rdma_cm_event_str(event->event)); return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ? -EINVAL : 0; } sdp_add_to_history(sk, rdma_cm_event_str(event->event)); lock_sock_nested(sk, SINGLE_DEPTH_NESTING); sdp_dbg(sk, "event: %s\n", rdma_cm_event_str(event->event)); if (!sdp_sk(sk)->id) { sdp_dbg(sk, "socket is being torn down\n"); rc = event->event == RDMA_CM_EVENT_CONNECT_REQUEST ? -EINVAL : 0; release_sock(sk); return rc; } switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: if (sdp_link_layer_ib_only && rdma_node_get_transport(id->device->node_type) == RDMA_TRANSPORT_IB && rdma_port_get_link_layer(id->device, id->port_num) != IB_LINK_LAYER_INFINIBAND) { sdp_dbg(sk, "Link layer is: %d. Only IB link layer " "is allowed\n", rdma_port_get_link_layer(id->device, id->port_num)); rc = -ENETUNREACH; break; } rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT); break; case RDMA_CM_EVENT_ADDR_ERROR: rc = -ENETUNREACH; break; case RDMA_CM_EVENT_ROUTE_RESOLVED: rc = sdp_init_qp(sk, id); if (rc) break; memset(&hh, 0, sizeof hh); hh.bsdh.mid = SDP_MID_HELLO; hh.bsdh.len = htonl(sizeof(struct sdp_hh)); hh.max_adverts = 1; hh.majv_minv = SDP_MAJV_MINV; sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size); hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk))); atomic_set(&sdp_sk(sk)->remote_credits, rx_ring_posted(sdp_sk(sk))); hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_frags * PAGE_SIZE + sizeof(struct sdp_bsdh)); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (inet6_sk(sk)) { struct sockaddr *src_addr = (struct sockaddr *)&id->route.addr.src_addr; struct sockaddr_in *addr4 = (struct sockaddr_in *)src_addr; struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)src_addr; if (src_addr->sa_family == AF_INET) { /* IPv4 over IPv6 */ ipv6_addr_set(&sk->sk_v6_rcv_saddr, 0, 0, htonl(0xFFFF), addr4->sin_addr.s_addr); } else { sk->sk_v6_rcv_saddr = addr6->sin6_addr; } inet6_sk(sk)->saddr = sk->sk_v6_rcv_saddr; } else #endif { sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) = ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; } memset(&conn_param, 0, sizeof conn_param); conn_param.private_data_len = sizeof hh; conn_param.private_data = &hh; conn_param.responder_resources = 4 /* TODO */; conn_param.initiator_depth = 4 /* TODO */; conn_param.retry_count = sdp_retry_count; SDP_DUMP_PACKET(sk, "TX", NULL, &hh.bsdh); if (sdp_apm_enable) { rc = rdma_enable_apm(id, RDMA_ALT_PATH_BEST); if (rc) sdp_warn(sk, "APM couldn't be enabled for active side: %d\n", rc); } rc = rdma_connect(id, &conn_param); break; case RDMA_CM_EVENT_ALT_ROUTE_RESOLVED: sdp_dbg(sk, "alt route was resolved slid=%d, dlid=%d\n", id->route.path_rec[1].slid, id->route.path_rec[1].dlid); break; case RDMA_CM_EVENT_ALT_PATH_LOADED: sdp_dbg(sk, "alt route path loaded\n"); break; case RDMA_CM_EVENT_ALT_ROUTE_ERROR: sdp_warn(sk, "alt route resolve error\n"); break; case RDMA_CM_EVENT_ROUTE_ERROR: rc = -ETIMEDOUT; break; case RDMA_CM_EVENT_CONNECT_REQUEST: rc = sdp_connect_handler(sk, id, event); if (rc) { sdp_dbg(sk, "Destroying qp\n"); rdma_reject(id, NULL, 0); break; } child = id->context; atomic_set(&sdp_sk(child)->remote_credits, rx_ring_posted(sdp_sk(child))); memset(&hah, 0, sizeof hah); hah.bsdh.mid = SDP_MID_HELLO_ACK; hah.bsdh.bufs = htons(rx_ring_posted(sdp_sk(child))); hah.bsdh.len = htonl(sizeof(struct sdp_hah)); hah.majv_minv = SDP_MAJV_MINV; hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec, but just in case */ hah.actrcvsz = htonl(sdp_sk(child)->recv_frags * PAGE_SIZE + sizeof(struct sdp_bsdh)); memset(&conn_param, 0, sizeof conn_param); conn_param.private_data_len = sizeof hah; conn_param.private_data = &hah; conn_param.responder_resources = 4 /* TODO */; conn_param.initiator_depth = 4 /* TODO */; conn_param.retry_count = sdp_retry_count; SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh); rc = rdma_accept(id, &conn_param); if (rc) { sdp_sk(child)->id = NULL; id->qp = NULL; id->context = NULL; parent = sdp_sk(child)->parent; /* TODO: hold ? */ } else if (sdp_apm_enable) { rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST); if (rc2) sdp_warn(sk, "APM couldn't be enabled for passive side: %d\n", rc2); } break; case RDMA_CM_EVENT_CONNECT_RESPONSE: rc = sdp_response_handler(sk, id, event); if (rc) { sdp_dbg(sk, "Destroying qp\n"); rdma_reject(id, NULL, 0); } else { rc = rdma_accept(id, NULL); if (!rc && sdp_apm_enable) { rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST); if (rc2) sdp_warn(sk, "APM couldn't be enabled for passive side:%d \n", rc2); } } break; case RDMA_CM_EVENT_CONNECT_ERROR: rc = -ETIMEDOUT; break; case RDMA_CM_EVENT_UNREACHABLE: rc = -ENETUNREACH; break; case RDMA_CM_EVENT_REJECTED: rc = -ECONNREFUSED; break; case RDMA_CM_EVENT_ESTABLISHED: sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) = ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; rc = sdp_connected_handler(sk); break; case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */ if (sk->sk_state == TCP_LAST_ACK) { sdp_cancel_dreq_wait_timeout(sdp_sk(sk)); sdp_exch_state(sk, TCPF_LAST_ACK, TCP_TIME_WAIT); sdp_dbg(sk, "%s: waiting for Infiniband tear down\n", __func__); } sdp_sk(sk)->qp_active = 0; rdma_disconnect(id); if (sk->sk_state != TCP_TIME_WAIT) { if (sk->sk_state == TCP_CLOSE_WAIT) { sdp_dbg(sk, "IB teardown while in " "TCP_CLOSE_WAIT taking reference to " "let close() finish the work\n"); sock_hold(sk, SOCK_REF_CMA); sdp_start_cma_timewait_timeout(sdp_sk(sk), SDP_CMA_TIMEWAIT_TIMEOUT); } sdp_set_error(sk, -EPIPE); rc = sdp_disconnected_handler(sk); } break; case RDMA_CM_EVENT_TIMEWAIT_EXIT: rc = sdp_disconnected_handler(sk); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: rc = -ENETRESET; break; case RDMA_CM_EVENT_ADDR_CHANGE: sdp_dbg(sk, "Got Address change event\n"); rc = 0; break; default: printk(KERN_ERR "SDP: Unexpected CMA event: %d\n", event->event); rc = -ECONNABORTED; break; } sdp_dbg(sk, "event: %s handled\n", rdma_cm_event_str(event->event)); if (rc && sdp_sk(sk)->id == id) { child = sk; sdp_sk(sk)->id = NULL; id->qp = NULL; id->context = NULL; parent = sdp_sk(sk)->parent; sdp_reset_sk(sk, rc); } release_sock(sk); sdp_dbg(sk, "event: %s done. status %d\n", rdma_cm_event_str(event->event), rc); if (parent) { lock_sock(parent); if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */ sdp_dbg(sk, "parent is going away.\n"); child = NULL; goto done; } if (!list_empty(&sdp_sk(child)->backlog_queue)) list_del_init(&sdp_sk(child)->backlog_queue); else child = NULL; done: release_sock(parent); if (child) sdp_common_release(child); } return rc; }