//smallboy: We change the whole func here; int ip_local_out(struct sk_buff *skb) { //us_nic_port *port = NULL; //port = get_local_out_port(skb); //if (port == NULL ){ // IP_INC_STATS_BH(skb->pnet, IPSTATS_MIB_OUTNOROUTES); // return US_ENETUNREACH; //} s32 ret = US_RET_OK; u32 n ; struct rte_mbuf *mbuf = (struct rte_mbuf*)(skb->head); struct net *pnet = skb->pnet; if(skb->nohdr ){ if( skb->nf_trace || skb->used > 0 ){ rte_pktmbuf_prepend(mbuf, skb->mac_len); } mbuf_rebuild(skb , pnet->port); //ret = ip_send_out(pnet,mbuf,skb); //smallboy: unkown error here; ip traunked; ??? recv_pkt_dump(&mbuf, 1); n = rte_eth_tx_burst(pnet->port_id, pnet->send_queue_id , &mbuf, 1); if(n < 1){ US_ERR("TH:%u ,tx_burst failed on skb_id:%u sk_id:%u \n" ,US_GET_LCORE(),skb->skb_id,skb->sk->sk_id); IP_ADD_STATS(skb->pnet,IPSTATS_MIB_OUTDISCARDS , 1); ret = US_ENETDOWN; }else{ ret = US_RET_OK; IP_ADD_STATS(skb->pnet,IPSTATS_MIB_OUTPKTS , 1); //skb->gso_segs == 1; } if(skb->users == 1){ //smallboy: More attention about the users,clone,used and nf_trace; __kfree_skb(skb,US_MBUF_FREE_BY_OTHER); //users == 1; not cloned; or errors here; }else{ if(ret == US_RET_OK) //smallboy: data send failed; No recover the users too; skb->users--; skb->used++; skb_reset_data_header(skb); } }else{ if(skb_can_gso(skb)){ ret = ip_send_out_batch(skb,pnet); } if(skb->users == 1){ //smallboy: More attention about the users,clone,used and nf_trace; __kfree_skb(skb,US_MBUF_FREE_BY_STACK); //users == 1; not cloned; or errors here; }else{ if(ret == US_RET_OK) skb->users--; skb->used++; skb_reset_data_header(skb); } } return ret; }
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, struct net_device *dev) { int status; struct netpoll_info *npinfo; if (!np || !dev || !netif_running(dev)) { __kfree_skb(skb); return; } npinfo = dev->npinfo; /* avoid recursion */ if (npinfo->poll_owner == smp_processor_id() || np->dev->xmit_lock_owner == smp_processor_id()) { if (np->drop) np->drop(skb); else __kfree_skb(skb); return; } do { npinfo->tries--; netif_tx_lock(dev); /* * network drivers do not expect to be called if the queue is * stopped. */ status = NETDEV_TX_BUSY; if (!netif_queue_stopped(dev)) { dev->priv_flags |= IFF_IN_NETPOLL; status = dev->hard_start_xmit(skb, dev); dev->priv_flags &= ~IFF_IN_NETPOLL; } netif_tx_unlock(dev); /* success */ if(!status) { npinfo->tries = MAX_RETRIES; /* reset */ return; } /* transmit busy */ netpoll_poll_dev(dev); udelay(50); } while (npinfo->tries > 0); }
static void zap_completion_queue(void) { unsigned long flags; struct softnet_data *sd = &get_cpu_var(softnet_data); if (sd->completion_queue) { struct sk_buff *clist; local_irq_save(flags); clist = sd->completion_queue; sd->completion_queue = NULL; local_irq_restore(flags); while (clist != NULL) { struct sk_buff *skb = clist; clist = clist->next; if (skb->destructor) dev_kfree_skb_any(skb); /* put this one back */ else __kfree_skb(skb); } } put_cpu_var(softnet_data); }
static void queue_process(struct work_struct *work) { struct netpoll_info *npinfo = container_of(work, struct netpoll_info, tx_work.work); struct sk_buff *skb; unsigned long flags; while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; if (!netif_device_present(dev) || !netif_running(dev)) { __kfree_skb(skb); continue; } local_irq_save(flags); netif_tx_lock(dev); if (netif_queue_stopped(dev) || dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); netif_tx_unlock(dev); local_irq_restore(flags); schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } netif_tx_unlock(dev); local_irq_restore(flags); } }
int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, const unsigned len) { struct dccp_sock *dp = dccp_sk(sk); if (dccp_check_seqno(sk, skb)) goto discard; if (dccp_parse_options(sk, skb)) goto discard; if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); if (dccp_msk(sk)->dccpms_send_ack_vector && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKVEC_STATE_RECEIVED)) goto discard; ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); return __dccp_rcv_established(sk, skb, dh, len); discard: __kfree_skb(skb); return 0; }
int emuswitch_process(struct net_bridge_throttle_state *state, struct sk_buff *skb) { static int maxqlen = 0; //emuswitch_thunk_t thunk = EMUSWITCH_SKB_CB(skb)->thunk; //emuswitch_thunkstate_t thunkState = EMUSWITCH_SKB_CB(skb)->thunkstate; maxqlen = max(maxqlen, state->maxQueueLen); if(state->queue.qlen < state->maxQueueLen) { int oldQlen = state->queue.qlen; __skb_queue_tail(&state->queue, skb); #if 0 // microoptimization if(oldQlen == 0) { restartTimer(state); } #endif //printk("queue length %d\n", state->queue.qlen); } else { // Drop packet printk("dropped packet\n"); __kfree_skb(skb); } if(state->queue.qlen > 200) { printk("queue length: %d/%d\n", state->queue.qlen, state->maxQueueLen); } return 1; }
/** * kfree_skb - free an sk_buff * @skb: buffer to free * * Drop a reference to the buffer and free it if the usage count has * hit zero. */ void kfree_skb(struct sk_buff *skb) { if (unlikely(!skb)) return; if (likely(atomic_read(&skb->users) == 1)) smp_rmb(); else if (likely(!atomic_dec_and_test(&skb->users))) return; __kfree_skb(skb); }
/** * consume_skb - free an skbuff * @skb: buffer to free * * Drop a ref to the buffer and free it if the usage count has hit zero * Functions identically to kfree_skb, but kfree_skb assumes that the frame * is being dropped after a failure and notes that */ void consume_skb(struct sk_buff *skb) { if (unlikely(!skb)) return ; else if (likely(!atomic_dec_and_test(&skb->users))) return ; __kfree_skb(skb); } EXPORT_SYMBOL(consume_skb);
int dccp_disconnect(struct sock *sk, int flags) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); int err = 0; const int old_state = sk->sk_state; if (old_state != DCCP_CLOSED) dccp_set_state(sk, DCCP_CLOSED); /* * This corresponds to the ABORT function of RFC793, sec. 3.8 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted". */ if (old_state == DCCP_LISTEN) { inet_csk_listen_stop(sk); } else if (dccp_need_reset(old_state)) { dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); sk->sk_err = ECONNRESET; } else if (old_state == DCCP_REQUESTING) sk->sk_err = ECONNRESET; dccp_clear_xmit_timers(sk); ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); dp->dccps_hc_rx_ccid = NULL; __skb_queue_purge(&sk->sk_receive_queue); __skb_queue_purge(&sk->sk_write_queue); if (sk->sk_send_head != NULL) { __kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; } inet->inet_dport = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); icsk->icsk_backoff = 0; inet_csk_delack_init(sk); __sk_dst_reset(sk); WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; }
static int tfw_bmb_conn_recv(void *cdata, struct sk_buff *skb, unsigned int off) { if (verbose) { unsigned int data_off = 0; TFW_LOG("Server response:\n------------------------------\n"); ss_skb_process(skb, &data_off, tfw_bmb_print_msg, NULL); printk(KERN_INFO "\n------------------------------\n"); } __kfree_skb(skb); return TFW_PASS; }
void cxgb3i_conn_cleanup_task(struct iscsi_task *task) { struct cxgb3i_task_data *tdata = task->dd_data + sizeof(struct iscsi_tcp_task); /* never reached the xmit task callout */ if (tdata->skb) __kfree_skb(tdata->skb); memset(tdata, 0, sizeof(struct cxgb3i_task_data)); /* MNC - Do we need a check in case this is called but * cxgb3i_conn_alloc_pdu has never been called on the task */ cxgb3i_release_itt(task, task->hdr_itt); iscsi_tcp_cleanup_task(task); }
int t3_l2t_send_slow(struct t3cdev *dev, struct sk_buff *skb, struct l2t_entry *e) { again: switch (e->state) { case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ neigh_event_send(e->neigh, NULL); spin_lock_bh(&e->lock); if (e->state == L2T_STATE_STALE) e->state = L2T_STATE_VALID; spin_unlock_bh(&e->lock); case L2T_STATE_VALID: /* fast-path, send the packet on */ return cxgb3_ofld_send(dev, skb); case L2T_STATE_RESOLVING: spin_lock_bh(&e->lock); if (e->state != L2T_STATE_RESOLVING) { /* ARP already completed */ spin_unlock_bh(&e->lock); goto again; } arpq_enqueue(e, skb); spin_unlock_bh(&e->lock); /* * Only the first packet added to the arpq should kick off * resolution. However, because the alloc_skb below can fail, * we allow each packet added to the arpq to retry resolution * as a way of recovering from transient memory exhaustion. * A better way would be to use a work request to retry L2T * entries when there's no memory. */ if (!neigh_event_send(e->neigh, NULL)) { skb = alloc_skb(sizeof(struct cpl_l2t_write_req), GFP_ATOMIC); if (!skb) break; spin_lock_bh(&e->lock); if (e->arpq_head) setup_l2e_send_pending(dev, skb, e); else /* we lost the race */ __kfree_skb(skb); spin_unlock_bh(&e->lock); } } return 0; }
void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) { bool slow; if (likely(atomic_read(&skb->users) == 1)) smp_rmb(); else if (likely(!atomic_dec_and_test(&skb->users))) return; slow = lock_sock_fast(sk); skb_orphan(skb); sk_mem_reclaim_partial(sk); unlock_sock_fast(sk, slow); /* skb is now orphaned, can be freed outside of locked section */ __kfree_skb(skb); }
int dccp_disconnect(struct sock *sk, int flags) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); int err = 0; const int old_state = sk->sk_state; if (old_state != DCCP_CLOSED) dccp_set_state(sk, DCCP_CLOSED); if (old_state == DCCP_LISTEN) { inet_csk_listen_stop(sk); } else if (dccp_need_reset(old_state)) { dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); sk->sk_err = ECONNRESET; } else if (old_state == DCCP_REQUESTING) sk->sk_err = ECONNRESET; dccp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); __skb_queue_purge(&sk->sk_write_queue); if (sk->sk_send_head != NULL) { __kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; } inet->dport = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); icsk->icsk_backoff = 0; inet_csk_delack_init(sk); __sk_dst_reset(sk); WARN_ON(inet->num && !icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; }
void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) { bool slow; if (likely(atomic_read(&skb->users) == 1)) smp_rmb(); else if (likely(!atomic_dec_and_test(&skb->users))) return; slow = lock_sock_fast(sk); skb_orphan(skb); sk_mem_reclaim_partial(sk); unlock_sock_fast(sk, slow); trace_kfree_skb(skb, skb_free_datagram_locked); __kfree_skb(skb); }
void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) { bool slow; if (!skb_unref(skb)) { sk_peek_offset_bwd(sk, len); return; } slow = lock_sock_fast(sk); sk_peek_offset_bwd(sk, len); skb_orphan(skb); sk_mem_reclaim_partial(sk); unlock_sock_fast(sk, slow); /* skb is now orphaned, can be freed outside of locked section */ __kfree_skb(skb); }
static inline void disable_nb_throttle_state(struct net_bridge_throttle_state *state) { if(state->useThrottle) { state->useThrottle = 0; BUG_TRAP(state->maxQueueLen > 0); BUG_TRAP(state->bytesPerSecond > 0); state->bytesSentInInterval = 0; state->timeout = 0; del_timer(&state->queue_timer); while(state->queue.qlen > 0) { struct sk_buff *skb = __skb_dequeue(&state->queue); __kfree_skb(skb); } } }
static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { int status = NETDEV_TX_BUSY; unsigned long tries; struct net_device *dev = np->dev; struct netpoll_info *npinfo = np->dev->npinfo; if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { __kfree_skb(skb); return; } /* don't get messages out of order, and no recursion */ if (skb_queue_len(&npinfo->txq) == 0 && npinfo->poll_owner != smp_processor_id()) { unsigned long flags; local_irq_save(flags); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { if (netif_tx_trylock(dev)) { if (!netif_queue_stopped(dev)) status = dev->hard_start_xmit(skb, dev); netif_tx_unlock(dev); if (status == NETDEV_TX_OK) break; } /* tickle device maybe there is some cleanup */ netpoll_poll(np); udelay(USEC_PER_POLL); } local_irq_restore(flags); } if (status != NETDEV_TX_OK) { skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } }
void netpoll_queue(struct sk_buff *skb) { unsigned long flags; if (queue_depth == MAX_QUEUE_DEPTH) { __kfree_skb(skb); return; } spin_lock_irqsave(&queue_lock, flags); if (!queue_head) queue_head = skb; else queue_tail->next = skb; queue_tail = skb; queue_depth++; spin_unlock_irqrestore(&queue_lock, flags); schedule_work(&send_queue); }
int dccp_disconnect(struct sock *sk, int flags) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); int err = 0; const int old_state = sk->sk_state; if (old_state != DCCP_CLOSED) dccp_set_state(sk, DCCP_CLOSED); /* ABORT function of RFC793 */ if (old_state == DCCP_LISTEN) { inet_csk_listen_stop(sk); /* FIXME: do the active reset thing */ } else if (old_state == DCCP_REQUESTING) sk->sk_err = ECONNRESET; dccp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); if (sk->sk_send_head != NULL) { __kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; } inet->dport = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); icsk->icsk_backoff = 0; inet_csk_delack_init(sk); __sk_dst_reset(sk); BUG_TRAP(!inet->num || icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; }
void cxgb3i_conn_pdu_ready(struct s3_conn *c3cn) { struct sk_buff *skb; unsigned int read = 0; struct iscsi_conn *conn = c3cn->user_data; int err = 0; cxgb3i_rx_debug("cn 0x%p.\n", c3cn); read_lock(&c3cn->callback_lock); if (unlikely(!conn || conn->suspend_rx)) { cxgb3i_rx_debug("conn 0x%p, id %d, suspend_rx %lu!\n", conn, conn ? conn->id : 0xFF, conn ? conn->suspend_rx : 0xFF); read_unlock(&c3cn->callback_lock); return; } skb = skb_peek(&c3cn->receive_queue); while (!err && skb) { __skb_unlink(skb, &c3cn->receive_queue); read += skb_rx_pdulen(skb); cxgb3i_rx_debug("conn 0x%p, cn 0x%p, rx skb 0x%p, pdulen %u.\n", conn, c3cn, skb, skb_rx_pdulen(skb)); err = cxgb3i_conn_read_pdu_skb(conn, skb); __kfree_skb(skb); skb = skb_peek(&c3cn->receive_queue); } read_unlock(&c3cn->callback_lock); if (c3cn) { c3cn->copied_seq += read; cxgb3i_c3cn_rx_credits(c3cn, read); } conn->rxdata_octets += read; if (err) { cxgb3i_log_info("conn 0x%p rx failed err %d.\n", conn, err); iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); } }
int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, const unsigned len) { struct dccp_sock *dp = dccp_sk(sk); if (dccp_check_seqno(sk, skb)) goto discard; if (dccp_parse_options(sk, skb)) goto discard; if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); if (dccp_msk(sk)->dccpms_send_ack_vector && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKVEC_STATE_RECEIVED)) goto discard; /* * Deliver to the CCID module in charge. * FIXME: Currently DCCP operates one-directional only, i.e. a listening * server is not at the same time a connecting client. There is * not much sense in delivering to both rx/tx sides at the moment * (only one is active at a time); when moving to bidirectional * service, this needs to be revised. */ if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER) ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); else ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); return __dccp_rcv_established(sk, skb, dh, len); discard: __kfree_skb(skb); return 0; }
/** * This is main body of the socket close function in Sync Sockets. * * inet_release() can sleep (as well as tcp_close()), so we make our own * non-sleepable socket closing. * * This function must be used only for data sockets. * Use standard sock_release() for listening sockets. * * In most cases it is called in softirq context and from ksoftirqd which * processes data from the socket (RSS and RPS distribute packets that way). * * Note: it used to be called in process context as well, at the time when * Tempesta starts or stops. That's not the case right now, but it may change. * * TODO In some cases we need to close socket agresively w/o FIN_WAIT_2 state, * e.g. by sending RST. So we need to add second parameter to the function * which says how to close the socket. * One of the examples is rcl_req_limit() (it should reset connections). * See tcp_sk(sk)->linger2 processing in standard tcp_close(). * * Called with locked socket. */ static void ss_do_close(struct sock *sk) { struct sk_buff *skb; int data_was_unread = 0; int state; if (unlikely(!sk)) return; SS_DBG("Close socket %p (%s): cpu=%d account=%d refcnt=%d\n", sk, ss_statename[sk->sk_state], smp_processor_id(), sk_has_account(sk), atomic_read(&sk->sk_refcnt)); assert_spin_locked(&sk->sk_lock.slock); ss_sock_cpu_check(sk); BUG_ON(sk->sk_state == TCP_LISTEN); /* We must return immediately, so LINGER option is meaningless. */ WARN_ON(sock_flag(sk, SOCK_LINGER)); /* We don't support virtual containers, so TCP_REPAIR is prohibited. */ WARN_ON(tcp_sk(sk)->repair); /* The socket must have atomic allocation mask. */ WARN_ON(!(sk->sk_allocation & GFP_ATOMIC)); /* The below is mostly copy-paste from tcp_close(). */ sk->sk_shutdown = SHUTDOWN_MASK; while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq - tcp_hdr(skb)->fin; data_was_unread += len; SS_DBG("free rcv skb %p\n", skb); __kfree_skb(skb); } sk_mem_reclaim(sk); if (sk->sk_state == TCP_CLOSE) goto adjudge_to_death; if (data_was_unread) { NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, sk->sk_allocation); } else if (tcp_close_state(sk)) { /* The code below is taken from tcp_send_fin(). */ struct tcp_sock *tp = tcp_sk(sk); int mss_now = tcp_current_mss(sk); skb = tcp_write_queue_tail(sk); if (tcp_send_head(sk) != NULL) { /* Send FIN with data if we have any. */ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; TCP_SKB_CB(skb)->end_seq++; tp->write_seq++; } else { /* No data to send in the socket, allocate new skb. */ skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); if (!skb) { SS_WARN("can't send FIN due to bad alloc"); } else { skb_reserve(skb, MAX_TCP_HEADER); tcp_init_nondata_skb(skb, tp->write_seq, TCPHDR_ACK | TCPHDR_FIN); tcp_queue_skb(sk, skb); } } __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); } adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); /* * SS sockets are processed in softirq only, * so backlog queue should be empty. */ WARN_ON(sk->sk_backlog.tail); percpu_counter_inc(sk->sk_prot->orphan_count); if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) return; if (sk->sk_state == TCP_FIN_WAIT2) { const int tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); return; } } if (sk->sk_state != TCP_CLOSE) { sk_mem_reclaim(sk); if (tcp_check_oom(sk, 0)) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); } } if (sk->sk_state == TCP_CLOSE) { struct request_sock *req = tcp_sk(sk)->fastopen_rsk; if (req != NULL) reqsk_fastopen_remove(sk, req, false); inet_csk_destroy_sock(sk); } }
void dccp_close(struct sock *sk, long timeout) { struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; u32 data_was_unread = 0; int state; lock_sock(sk); sk->sk_shutdown = SHUTDOWN_MASK; if (sk->sk_state == DCCP_LISTEN) { dccp_set_state(sk, DCCP_CLOSED); /* Special case. */ inet_csk_listen_stop(sk); goto adjudge_to_death; } sk_stop_timer(sk, &dp->dccps_xmit_timer); /* * We need to flush the recv. buffs. We do this only on the * descriptor close, not protocol-sourced closes, because the *reader process may not have drained the data yet! */ while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { data_was_unread += skb->len; __kfree_skb(skb); } if (data_was_unread) { /* Unread data was tossed, send an appropriate Reset Code */ DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread); dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); dccp_set_state(sk, DCCP_CLOSED); } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); } else if (sk->sk_state != DCCP_CLOSED) { /* * Normal connection termination. May need to wait if there are * still packets in the TX queue that are delayed by the CCID. */ dccp_flush_write_queue(sk, &timeout); dccp_terminate_connection(sk); } /* * Flush write queue. This may be necessary in several cases: * - we have been closed by the peer but still have application data; * - abortive termination (unread data or zero linger time), * - normal termination but queue could not be flushed within time limit */ __skb_queue_purge(&sk->sk_write_queue); sk_stream_wait_close(sk, timeout); adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); /* * It is the last release_sock in its life. It will remove backlog. */ release_sock(sk); /* * Now socket is owned by kernel and we acquire BH lock * to finish close. No need to check for user refs. */ local_bh_disable(); bh_lock_sock(sk); WARN_ON(sock_owned_by_user(sk)); percpu_counter_inc(sk->sk_prot->orphan_count); /* Have we already been destroyed by a softirq or backlog? */ if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) goto out; if (sk->sk_state == DCCP_CLOSED) inet_csk_destroy_sock(sk); /* Otherwise, socket is reprieved until protocol close. */ out: bh_unlock_sock(sk); local_bh_enable(); sock_put(sk); }
void dccp_close(struct sock *sk, long timeout) { struct sk_buff *skb; lock_sock(sk); sk->sk_shutdown = SHUTDOWN_MASK; if (sk->sk_state == DCCP_LISTEN) { dccp_set_state(sk, DCCP_CLOSED); /* Special case. */ inet_csk_listen_stop(sk); goto adjudge_to_death; } /* * We need to flush the recv. buffs. We do this only on the * descriptor close, not protocol-sourced closes, because the *reader process may not have drained the data yet! */ /* FIXME: check for unread data */ while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { __kfree_skb(skb); } if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); } else if (dccp_close_state(sk)) { dccp_send_close(sk, 1); } sk_stream_wait_close(sk, timeout); adjudge_to_death: /* * It is the last release_sock in its life. It will remove backlog. */ release_sock(sk); /* * Now socket is owned by kernel and we acquire BH lock * to finish close. No need to check for user refs. */ local_bh_disable(); bh_lock_sock(sk); BUG_TRAP(!sock_owned_by_user(sk)); sock_hold(sk); sock_orphan(sk); /* * The last release_sock may have processed the CLOSE or RESET * packet moving sock to CLOSED state, if not we have to fire * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination" * in draft-ietf-dccp-spec-11. -acme */ if (sk->sk_state == DCCP_CLOSING) { /* FIXME: should start at 2 * RTT */ /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, DCCP_RTO_MAX); #if 0 /* Yeah, we should use sk->sk_prot->orphan_count, etc */ dccp_set_state(sk, DCCP_CLOSED); #endif } atomic_inc(sk->sk_prot->orphan_count); if (sk->sk_state == DCCP_CLOSED) inet_csk_destroy_sock(sk); /* Otherwise, socket is reprieved until protocol close. */ bh_unlock_sock(sk); local_bh_enable(); sock_put(sk); }
void dccp_close(struct sock *sk, long timeout) { struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; u32 data_was_unread = 0; int state; lock_sock(sk); sk->sk_shutdown = SHUTDOWN_MASK; if (sk->sk_state == DCCP_LISTEN) { dccp_set_state(sk, DCCP_CLOSED); /* Special case. */ inet_csk_listen_stop(sk); goto adjudge_to_death; } sk_stop_timer(sk, &dp->dccps_xmit_timer); /* * We need to flush the recv. buffs. We do this only on the * descriptor close, not protocol-sourced closes, because the *reader process may not have drained the data yet! */ while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { data_was_unread += skb->len; __kfree_skb(skb); } if (data_was_unread) { /* Unread data was tossed, send an appropriate Reset Code */ DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread); dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); dccp_set_state(sk, DCCP_CLOSED); } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); } else if (sk->sk_state != DCCP_CLOSED) { dccp_terminate_connection(sk); } sk_stream_wait_close(sk, timeout); adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); atomic_inc(sk->sk_prot->orphan_count); /* * It is the last release_sock in its life. It will remove backlog. */ release_sock(sk); /* * Now socket is owned by kernel and we acquire BH lock * to finish close. No need to check for user refs. */ local_bh_disable(); bh_lock_sock(sk); BUG_TRAP(!sock_owned_by_user(sk)); /* Have we already been destroyed by a softirq or backlog? */ if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) goto out; if (sk->sk_state == DCCP_CLOSED) inet_csk_destroy_sock(sk); /* Otherwise, socket is reprieved until protocol close. */ out: bh_unlock_sock(sk); local_bh_enable(); sock_put(sk); }
/* The socket must have it's spinlock held when we get * here. * * We have a potential double-lock case here, so even when * doing backlog processing we use the BH locking scheme. * This is because we cannot sleep with the original spinlock * held. */ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *opt_skb = NULL; /* Imagine: socket is IPv6. IPv4 packet arrives, goes to IPv4 receive handler and backlogged. From backlog it always goes here. Kerboom... Fortunately, dccp_rcv_established and rcv_established handle them correctly, but it is not case with dccp_v6_hnd_req and dccp_v6_ctl_send_reset(). --ANK */ if (skb->protocol == htons(ETH_P_IP)) return dccp_v4_do_rcv(sk, skb); if (sk_filter(sk, skb)) goto discard; /* * socket locking is here for SMP purposes as backlog rcv is currently * called with bh processing disabled. */ /* Do Stevens' IPV6_PKTOPTIONS. Yes, guys, it is the only place in our code, where we may make it not affecting IPv4. The rest of code is protocol independent, and I do not like idea to uglify IPv4. Actually, all the idea behind IPV6_PKTOPTIONS looks not very well thought. For now we latch options, received in the last packet, enqueued by tcp. Feel free to propose better solution. --ANK (980728) */ if (np->rxopt.all) /* * FIXME: Add handling of IPV6_PKTOPTIONS skb. See the comments below * (wrt ipv6_pktopions) and net/ipv6/tcp_ipv6.c for an example. */ opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == DCCP_OPEN) { /* Fast path */ if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len)) goto reset; if (opt_skb) { /* XXX This is where we would goto ipv6_pktoptions. */ __kfree_skb(opt_skb); } return 0; } /* * Step 3: Process LISTEN state * If S.state == LISTEN, * If P.type == Request or P contains a valid Init Cookie option, * (* Must scan the packet's options to check for Init * Cookies. Only Init Cookies are processed here, * however; other options are processed in Step 8. This * scan need only be performed if the endpoint uses Init * Cookies *) * (* Generate a new socket and switch to that socket *) * Set S := new socket for this port pair * S.state = RESPOND * Choose S.ISS (initial seqno) or set from Init Cookies * Initialize S.GAR := S.ISS * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies * Continue with S.state == RESPOND * (* A Response packet will be generated in Step 11 *) * Otherwise, * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return * * NOTE: the check for the packet types is done in * dccp_rcv_state_process */ if (sk->sk_state == DCCP_LISTEN) { struct sock *nsk = dccp_v6_hnd_req(sk, skb); if (nsk == NULL) goto discard; /* * Queue it on the new socket if the new socket is active, * otherwise we just shortcircuit this and continue with * the new socket.. */ if (nsk != sk) { if (dccp_child_process(sk, nsk, skb)) goto reset; if (opt_skb != NULL) __kfree_skb(opt_skb); return 0; } } if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len)) goto reset; if (opt_skb) { /* XXX This is where we would goto ipv6_pktoptions. */ __kfree_skb(opt_skb); } return 0; reset: dccp_v6_ctl_send_reset(sk, skb); discard: if (opt_skb != NULL) __kfree_skb(opt_skb); kfree_skb(skb); return 0; }
static int dccp_rcv_request_sent_state_process(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, const unsigned len) { /* * Step 4: Prepare sequence numbers in REQUEST * If S.state == REQUEST, * If (P.type == Response or P.type == Reset) * and S.AWL <= P.ackno <= S.AWH, * / * Set sequence number variables corresponding to the * other endpoint, so P will pass the tests in Step 6 * / * Set S.GSR, S.ISR, S.SWL, S.SWH * / * Response processing continues in Step 10; Reset * processing continues in Step 9 * / */ if (dh->dccph_type == DCCP_PKT_RESPONSE) { const struct inet_connection_sock *icsk = inet_csk(sk); struct dccp_sock *dp = dccp_sk(sk); /* Stop the REQUEST timer */ inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); BUG_TRAP(sk->sk_send_head != NULL); __kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) { dccp_pr_debug("invalid ackno: S.AWL=%llu, " "P.ackno=%llu, S.AWH=%llu \n", (unsigned long long)dp->dccps_awl, (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq, (unsigned long long)dp->dccps_awh); goto out_invalid_packet; } if (dccp_parse_options(sk, skb)) goto out_invalid_packet; if (dccp_msk(sk)->dccpms_send_ack_vector && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKVEC_STATE_RECEIVED)) goto out_invalid_packet; /* FIXME: change error code */ dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; dccp_update_gsr(sk, dp->dccps_isr); /* * SWL and AWL are initially adjusted so that they are not less than * the initial Sequence Numbers received and sent, respectively: * SWL := max(GSR + 1 - floor(W/4), ISR), * AWL := max(GSS - W' + 1, ISS). * These adjustments MUST be applied only at the beginning of the * connection. * * AWL was adjusted in dccp_v4_connect -acme */ dccp_set_seqno(&dp->dccps_swl, max48(dp->dccps_swl, dp->dccps_isr)); dccp_sync_mss(sk, icsk->icsk_pmtu_cookie); /* * Step 10: Process REQUEST state (second part) * If S.state == REQUEST, * / * If we get here, P is a valid Response from the * server (see Step 4), and we should move to * PARTOPEN state. PARTOPEN means send an Ack, * don't send Data packets, retransmit Acks * periodically, and always include any Init Cookie * from the Response * / * S.state := PARTOPEN * Set PARTOPEN timer * Continue with S.state == PARTOPEN * / * Step 12 will send the Ack completing the * three-way handshake * / */ dccp_set_state(sk, DCCP_PARTOPEN); /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); sk_wake_async(sk, 0, POLL_OUT); } if (sk->sk_write_pending || icsk->icsk_ack.pingpong || icsk->icsk_accept_queue.rskq_defer_accept) { /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. * * It may be deleted, but with this feature tcpdumps * look so _wonderfully_ clever, that I was not able * to stand against the temptation 8) --ANK */ /* * OK, in DCCP we can as well do a similar trick, its * even in the draft, but there is no need for us to * schedule an ack here, as dccp_sendmsg does this for * us, also stated in the draft. -acme */ __kfree_skb(skb); return 0; } dccp_send_ack(sk); return -1; } out_invalid_packet: /* dccp_v4_do_rcv will send a reset */ DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; return 1; }
int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct dccp_hdr *dh, unsigned len) { struct dccp_sock *dp = dccp_sk(sk); struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); const int old_state = sk->sk_state; int queued = 0; /* * Step 3: Process LISTEN state * * If S.state == LISTEN, * If P.type == Request or P contains a valid Init Cookie option, * (* Must scan the packet's options to check for Init * Cookies. Only Init Cookies are processed here, * however; other options are processed in Step 8. This * scan need only be performed if the endpoint uses Init * Cookies *) * (* Generate a new socket and switch to that socket *) * Set S := new socket for this port pair * S.state = RESPOND * Choose S.ISS (initial seqno) or set from Init Cookies * Initialize S.GAR := S.ISS * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init * Cookies Continue with S.state == RESPOND * (* A Response packet will be generated in Step 11 *) * Otherwise, * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ if (sk->sk_state == DCCP_LISTEN) { if (dh->dccph_type == DCCP_PKT_REQUEST) { if (inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) < 0) return 1; /* FIXME: do congestion control initialization */ goto discard; } if (dh->dccph_type == DCCP_PKT_RESET) goto discard; /* Caller (dccp_v4_do_rcv) will send Reset */ dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; } if (sk->sk_state != DCCP_REQUESTING) { if (dccp_check_seqno(sk, skb)) goto discard; /* * Step 8: Process options and mark acknowledgeable */ if (dccp_parse_options(sk, skb)) goto discard; if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); if (dccp_msk(sk)->dccpms_send_ack_vector && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKVEC_STATE_RECEIVED)) goto discard; /* XXX see the comments in dccp_rcv_established about this */ if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER) ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); else ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); } /* * Step 9: Process Reset * If P.type == Reset, * Tear down connection * S.state := TIMEWAIT * Set TIMEWAIT timer * Drop packet and return */ if (dh->dccph_type == DCCP_PKT_RESET) { /* * Queue the equivalent of TCP fin so that dccp_recvmsg * exits the loop */ dccp_fin(sk, skb); dccp_time_wait(sk, DCCP_TIME_WAIT, 0); return 0; /* * Step 7: Check for unexpected packet types * If (S.is_server and P.type == CloseReq) * or (S.is_server and P.type == Response) * or (S.is_client and P.type == Request) * or (S.state == RESPOND and P.type == Data), * Send Sync packet acknowledging P.seqno * Drop packet and return */ } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && (dh->dccph_type == DCCP_PKT_RESPONSE || dh->dccph_type == DCCP_PKT_CLOSEREQ)) || (dp->dccps_role == DCCP_ROLE_CLIENT && dh->dccph_type == DCCP_PKT_REQUEST) || (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); goto discard; } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { dccp_rcv_closereq(sk, skb); goto discard; } else if (dh->dccph_type == DCCP_PKT_CLOSE) { dccp_rcv_close(sk, skb); return 0; } if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); goto discard; } switch (sk->sk_state) { case DCCP_CLOSED: dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; case DCCP_REQUESTING: /* FIXME: do congestion control initialization */ queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); if (queued >= 0) return queued; __kfree_skb(skb); return 0; case DCCP_RESPOND: case DCCP_PARTOPEN: queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len); break; } if (dh->dccph_type == DCCP_PKT_ACK || dh->dccph_type == DCCP_PKT_DATAACK) { switch (old_state) { case DCCP_PARTOPEN: sk->sk_state_change(sk); sk_wake_async(sk, 0, POLL_OUT); break; } } if (!queued) { discard: __kfree_skb(skb); } return 0; }
/* The socket must have it's spinlock held when we get * here. * * We have a potential double-lock case here, so even when * doing backlog processing we use the BH locking scheme. * This is because we cannot sleep with the original spinlock * held. */ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *opt_skb = NULL; /* Imagine: socket is IPv6. IPv4 packet arrives, goes to IPv4 receive handler and backlogged. From backlog it always goes here. Kerboom... Fortunately, dccp_rcv_established and rcv_established handle them correctly, but it is not case with dccp_v6_hnd_req and dccp_v6_ctl_send_reset(). --ANK */ if (skb->protocol == htons(ETH_P_IP)) return dccp_v4_do_rcv(sk, skb); if (sk_filter(sk, skb)) goto discard; /* * socket locking is here for SMP purposes as backlog rcv is currently * called with bh processing disabled. */ /* Do Stevens' IPV6_PKTOPTIONS. Yes, guys, it is the only place in our code, where we may make it not affecting IPv4. The rest of code is protocol independent, and I do not like idea to uglify IPv4. Actually, all the idea behind IPV6_PKTOPTIONS looks not very well thought. For now we latch options, received in the last packet, enqueued by tcp. Feel free to propose better solution. --ANK (980728) */ if (np->rxopt.all) opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == DCCP_OPEN) { /* Fast path */ if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len)) goto reset; if (opt_skb) goto ipv6_pktoptions; return 0; } /* * Step 3: Process LISTEN state * If S.state == LISTEN, * If P.type == Request or P contains a valid Init Cookie option, * (* Must scan the packet's options to check for Init * Cookies. Only Init Cookies are processed here, * however; other options are processed in Step 8. This * scan need only be performed if the endpoint uses Init * Cookies *) * (* Generate a new socket and switch to that socket *) * Set S := new socket for this port pair * S.state = RESPOND * Choose S.ISS (initial seqno) or set from Init Cookies * Initialize S.GAR := S.ISS * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies * Continue with S.state == RESPOND * (* A Response packet will be generated in Step 11 *) * Otherwise, * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return * * NOTE: the check for the packet types is done in * dccp_rcv_state_process */ if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len)) goto reset; if (opt_skb) goto ipv6_pktoptions; return 0; reset: dccp_v6_ctl_send_reset(sk, skb); discard: if (opt_skb != NULL) __kfree_skb(opt_skb); kfree_skb(skb); return 0; /* Handling IPV6_PKTOPTIONS skb the similar * way it's done for net/ipv6/tcp_ipv6.c */ ipv6_pktoptions: if (!((1 << sk->sk_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) { if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) np->mcast_oif = inet6_iif(opt_skb); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); if (np->repflow) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &DCCP_SKB_CB(opt_skb)->header.h6)) { skb_set_owner_r(opt_skb, sk); memmove(IP6CB(opt_skb), &DCCP_SKB_CB(opt_skb)->header.h6, sizeof(struct inet6_skb_parm)); opt_skb = xchg(&np->pktoptions, opt_skb); } else { __kfree_skb(opt_skb); opt_skb = xchg(&np->pktoptions, NULL); } } kfree_skb(opt_skb); return 0; }