Esempio n. 1
0
void citp_waitable_init(ci_netif* ni, citp_waitable* w, int id)
{
  /* NB. Some members initialised in citp_waitable_obj_free(). */

  oo_p sp;

#if CI_CFG_SOCKP_IS_PTR
  w->bufid = id;
#else
  w->bufid = OO_SP_FROM_INT(ni, id);
#endif
  w->sb_flags = 0;
  w->sb_aflags = CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_NOT_READY;

  sp = oo_sockp_to_statep(ni, W_SP(w));
  OO_P_ADD(sp, CI_MEMBER_OFFSET(citp_waitable, post_poll_link));
  ci_ni_dllist_link_init(ni, &w->post_poll_link, sp, "ppll");
  ci_ni_dllist_self_link(ni, &w->post_poll_link);

  sp = oo_sockp_to_statep(ni, W_SP(w));
  OO_P_ADD(sp, CI_MEMBER_OFFSET(citp_waitable, ready_link));
  ci_ni_dllist_link_init(ni, &w->ready_link, sp, "rll");
  ci_ni_dllist_self_link(ni, &w->ready_link);

  w->lock.wl_val = 0;
  CI_DEBUG(w->wt_next = OO_SP_NULL);
  CI_DEBUG(w->next_id = CI_ILL_END);

  citp_waitable_reinit(ni, w);
}
Esempio n. 2
0
void ci_sock_cmn_init(ci_netif* ni, ci_sock_cmn* s)
{
  oo_p sp;

  /* Poison. */
  CI_DEBUG(memset(&s->b + 1, 0xf0, (char*) (s + 1) - (char*) (&s->b + 1)));

  citp_waitable_reinit(ni, &s->b);
  oo_sock_cplane_init(&s->cp);
  s->local_peer = OO_SP_NULL;

  s->s_flags = CI_SOCK_FLAG_CONNECT_MUST_BIND | CI_SOCK_FLAG_PMTU_DO;
  s->s_aflags = 0u;

  ci_assert_equal( 0, CI_IP_DFLT_TOS );
  s->so_priority = 0;

  /* SO_SNDBUF & SO_RCVBUF.  See also ci_tcp_set_established_state() which
   * may modify these values.
   */
  memset(&s->so, 0, sizeof(s->so));
  s->so.sndbuf = NI_OPTS(ni).tcp_sndbuf_def;
  s->so.rcvbuf = NI_OPTS(ni).tcp_rcvbuf_def;

  s->rx_bind2dev_ifindex = CI_IFID_BAD;
  /* These don't really need to be initialised, as only significant when
   * rx_bind2dev_ifindex != CI_IFID_BAD.  But makes stackdump output
   * cleaner this way...
   */
  s->rx_bind2dev_base_ifindex = 0;
  s->rx_bind2dev_vlan = 0;

  s->cmsg_flags = 0u;
  s->timestamping_flags = 0u;
  s->os_sock_status = OO_OS_STATUS_TX;

  ci_ip_queue_init(&s->timestamp_q);
  s->timestamp_q_extract = OO_PP_NULL;


  ci_sock_cmn_reinit(ni, s);

  sp = oo_sockp_to_statep(ni, SC_SP(s));
  OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_sock_cmn, reap_link));
  ci_ni_dllist_link_init(ni, &s->reap_link, sp, "reap");
  ci_ni_dllist_self_link(ni, &s->reap_link);
}
Esempio n. 3
0
ci_inline ci_tcp_state*
get_ts_from_cache(ci_netif *netif, 
                  ci_tcp_state_synrecv* tsr, 
                  ci_tcp_socket_listen* tls)
{
  ci_tcp_state *ts = NULL;
#if CI_CFG_FD_CACHING
  if( ci_ni_dllist_not_empty(netif, &tls->epcache.cache) ) {
    /* Take the entry from the cache */
    ci_ni_dllist_link *link = ci_ni_dllist_pop(netif, &tls->epcache.cache);
    ts = CI_CONTAINER (ci_tcp_state, epcache_link, link);
    ci_assert (ts);
    ci_ni_dllist_self_link(netif, &ts->epcache_link);

    LOG_EP(ci_log("Taking cached fd %d off cached list, (onto acceptq)",
           ts->cached_on_fd));

    if( tcp_laddr_be32(ts) == tsr->l_addr ) {
      ci_tcp_state_init(netif, ts, 1);
      /* Shouldn't have touched these bits of state */
      ci_assert(!(ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN));
      ci_assert(ci_tcp_is_cached(ts));

      CITP_STATS_NETIF(++netif->state->stats.sockcache_hit);
      CITP_STATS_TCP_LISTEN(++tls->stats.n_sockcache_hit);
    }
    else {
      /* Oh dear -- the tcp-state we cached was using a different local IP
       * address.  This means we've accepted a connection from a different
       * interface as we did for the thing we've cached.  Which means we
       * can't share the hardware filter after all.  For now, just bung it
       * back on the list.
       */
      LOG_EP(ci_log("changed interface of cached EP, re-queueing"));
      ci_ni_dllist_push_tail(netif, &tls->epcache.cache, &ts->epcache_link);
      ts = NULL;
      CITP_STATS_NETIF(++netif->state->stats.sockcache_miss_intmismatch);
    }
  }
#endif
  return ts;
}
Esempio n. 4
0
/* run any pending timers */
void ci_ip_timer_poll(ci_netif *netif) {
  ci_ip_timer_state* ipts = IPTIMER_STATE(netif); 
  ci_iptime_t* stime = &ipts->sched_ticks;
  ci_ip_timer* ts;
  ci_iptime_t rtime;
  ci_ni_dllist_link* link;
  int changed = 0;

  /* The caller is expected to ensure that the current time is sufficiently
  ** up-to-date.
  */
  rtime = ci_ip_time_now(netif);
  /* check for sanity i.e. time always goes forwards */
  ci_assert( TIME_GE(rtime, *stime) );

  /* bug chasing Bug 2855 - check the temp list used is OK before we start */
  ci_assert( ci_ni_dllist_is_valid(netif, &ipts->fire_list.l) );
  ci_assert( ci_ni_dllist_is_empty(netif, &ipts->fire_list));

  while( TIME_LT(*stime, rtime) ) {

    DETAILED_CHECK_TIMERS(netif);

    /* advance the schedulers view of time */
    (*stime)++;

    /* cascade through wheels if reached end of current wheel */
    if(BUCKETNO(0, *stime) == 0) {
      if(BUCKETNO(1, *stime) == 0) {
	if(BUCKETNO(2, *stime) == 0) {
	  ci_ip_timer_cascadewheel(netif, 3, *stime);
	}
	ci_ip_timer_cascadewheel(netif, 2, *stime);
      }
      changed = ci_ip_timer_cascadewheel(netif, 1, *stime);
    }


    /* Bug 1828: We need to be creaful here ... because:
        - ci_ip_timer_docallback can set/clear timers
        - the timers being set/cleared may not necessarily be the ones firing
        - however, they could be in this bucket
       In summary, need to ensure the ni_dllist stays valid at all times so 
       safe to call. Slightly complicated by the case that its not possible to
       hold indirected linked lists on the stack */
    ci_assert( ci_ni_dllist_is_valid(netif, &ipts->fire_list.l));
    ci_assert( ci_ni_dllist_is_empty(netif, &ipts->fire_list));

    /* run timers in the current bucket */
    ci_ni_dllist_rehome( netif,
                         &ipts->fire_list,
                         &ipts->warray[BUCKETNO(0, *stime)] );
    DETAILED_CHECK_TIMERS(netif);

    while( (link = ci_ni_dllist_try_pop(netif, &ipts->fire_list)) ) {

      ts = LINK2TIMER(link);

      ci_assert_equal(ts->time, *stime);

      /* ensure time marked as NOT pending */
      ci_ni_dllist_self_link(netif, &ts->link);

      /* callback safe to set/clear this or other timers */
      ci_ip_timer_docallback(netif, ts);
    }
    ci_assert( ci_ni_dllist_is_valid(netif, &ipts->fire_list.l) );
    ci_assert( ci_ni_dllist_is_empty(netif, &ipts->fire_list));

    DETAILED_CHECK_TIMERS(netif);
  }
  
  ci_assert( ci_ni_dllist_is_valid(netif, &ipts->fire_list.l) );
  ci_assert( ci_ni_dllist_is_empty(netif, &ipts->fire_list));

  /* What is our next timer?
   * Let's update if our previous "closest" timer have already been
   * handled, or if the previous estimation was "infinity". */
  if( TIME_GE(ipts->sched_ticks, ipts->closest_timer) ||
      (changed &&
       ipts->closest_timer - ipts->sched_ticks > IPTIME_INFINITY_LOW) ) {
    /* we peek into the first wheel only */
    ci_iptime_t base = ipts->sched_ticks & WHEEL0_MASK;
    ci_iptime_t b = ipts->sched_ticks - base;
    for( b++ ; b < CI_IPTIME_BUCKETS; b++ ) {
      if( !ci_ni_dllist_is_empty(netif, &ipts->warray[b]) ) {
        ipts->closest_timer = base + b;
        return;
      }
    }

    /* We do not know the next timer.  Set it to a sort of infinity. */
    ipts->closest_timer = ipts->sched_ticks + IPTIME_INFINITY;
  }
}
/* Move priv file to the alien_ni stack.
 * Should be called with the locked priv stack and socket;
 * the function returns with this stack being unlocked.
 * If rc=0, it returns with alien_ni stack locked;
 * otherwise, both stacks are unlocked.
 * Socket is always unlocked on return. */
int efab_file_move_to_alien_stack(ci_private_t *priv, ci_netif *alien_ni)
{
  tcp_helper_resource_t *old_thr = priv->thr;
  tcp_helper_resource_t *new_thr = netif2tcp_helper_resource(alien_ni);
  ci_sock_cmn *old_s = SP_TO_SOCK(&old_thr->netif, priv->sock_id);
  ci_sock_cmn *new_s;
  ci_sock_cmn *mid_s;
  tcp_helper_endpoint_t *old_ep, *new_ep;
  int rc, i;
  int pollwait_register = 0;
#if CI_CFG_FD_CACHING
  oo_p sp;
#endif

  OO_DEBUG_TCPH(ci_log("%s: move %d:%d to %d", __func__,
                       old_thr->id, priv->sock_id, new_thr->id));
  /* Poll the old stack - deliver all data to our socket */
  ci_netif_poll(&old_thr->netif);

  /* Endpoints in epoll list should not be moved, because waitq is already
   * in the epoll internal structures (bug 41152). */
  if( !list_empty(&priv->_filp->f_ep_links) ) {
    rc = -EBUSY;
    goto fail1;
  }

  if( !efab_file_move_supported(&old_thr->netif, old_s) ) {
    rc = -EINVAL;
    goto fail1;
  }

  /* Lock the second stack */
  i = 0;
  while( ! ci_netif_trylock(alien_ni) ) {
    ci_netif_unlock(&old_thr->netif);
    if( i++ >= 1000 ) {
      rc = -EBUSY;
      goto fail1_ni_unlocked;
    }
    rc = ci_netif_lock(&old_thr->netif);
    if( rc != 0 )
      goto fail1_ni_unlocked;
  }

  /* Allocate a new socket in the alien_ni stack */
  rc = -ENOMEM;
  if( old_s->b.state == CI_TCP_STATE_UDP ) {
    ci_udp_state *new_us = ci_udp_get_state_buf(alien_ni);
    if( new_us == NULL )
      goto fail2;
    new_s = &new_us->s;
  }
  else {
    ci_tcp_state *new_ts = ci_tcp_get_state_buf(alien_ni);
    if( new_ts == NULL )
      goto fail2;
    new_s = &new_ts->s;
  }

  /* Allocate an intermediate "socket" outside of everything */
  mid_s = ci_alloc(CI_MAX(sizeof(ci_tcp_state), sizeof(ci_udp_state)));
  if( mid_s == NULL )
    goto fail3;

  OO_DEBUG_TCPH(ci_log("%s: move %d:%d to %d:%d", __func__,
                       old_thr->id, priv->sock_id,
                       new_thr->id, new_s->b.bufid));

  /* Copy TCP/UDP state */
  memcpy(mid_s, old_s, CI_MAX(sizeof(ci_tcp_state), sizeof(ci_udp_state)));

  /* do not copy old_s->b.bufid
   * and other fields in stack adress space */
  mid_s->b.sb_aflags |= CI_SB_AFLAG_ORPHAN;
  mid_s->b.bufid = new_s->b.bufid;
  mid_s->b.post_poll_link = new_s->b.post_poll_link;
  mid_s->b.ready_link = new_s->b.ready_link;
  mid_s->reap_link = new_s->reap_link;

  if( old_s->b.state & CI_TCP_STATE_TCP ) {
    ci_tcp_state *new_ts = SOCK_TO_TCP(new_s);
    ci_tcp_state *mid_ts = SOCK_TO_TCP(mid_s);

    mid_ts->timeout_q_link = new_ts->timeout_q_link;
    mid_ts->tx_ready_link = new_ts->tx_ready_link;
    mid_ts->rto_tid = new_ts->rto_tid;
    mid_ts->delack_tid = new_ts->delack_tid;
    mid_ts->zwin_tid = new_ts->zwin_tid;
    mid_ts->kalive_tid = new_ts->kalive_tid;
    mid_ts->cork_tid = new_ts->cork_tid;
    ci_ip_queue_init(&mid_ts->recv1);
    ci_ip_queue_init(&mid_ts->recv2);
    ci_ip_queue_init(&mid_ts->send);
    ci_ip_queue_init(&mid_ts->retrans);
    mid_ts->send_prequeue = OO_PP_ID_NULL;
    new_ts->retrans_ptr = OO_PP_NULL;
    mid_ts->tmpl_head = OO_PP_NULL;
    oo_atomic_set(&mid_ts->send_prequeue_in, 0);

    *new_ts = *mid_ts;
    ci_pmtu_state_init(alien_ni, &new_ts->s, &new_ts->pmtus,
                       CI_IP_TIMER_PMTU_DISCOVER);
#if CI_CFG_FD_CACHING
    sp = TS_OFF(alien_ni, new_ts);
    OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_state, epcache_link));
    ci_ni_dllist_link_init(alien_ni, &new_ts->epcache_link, sp, "epch");
    ci_ni_dllist_self_link(alien_ni, &new_ts->epcache_link);
    sp = TS_OFF(alien_ni, new_ts);
    OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_state, epcache_fd_link));
    ci_ni_dllist_link_init(alien_ni, &new_ts->epcache_fd_link, sp, "ecfd");
    ci_ni_dllist_self_link(alien_ni, &new_ts->epcache_fd_link);
#endif
   
    /* free temporary mid_ts storage */
    CI_FREE_OBJ(mid_ts);
  }
  else {
    ci_udp_state *mid_us = SOCK_TO_UDP(mid_s);

    *SOCK_TO_UDP(new_s) = *mid_us;
    CI_FREE_OBJ(mid_us);
  }

  /* Move the filter */
  old_ep = ci_trs_ep_get(old_thr, priv->sock_id);
  new_ep = ci_trs_ep_get(new_thr, new_s->b.bufid);
  rc = tcp_helper_endpoint_move_filters_pre(old_ep, new_ep);
  if( rc != 0 ) {
    rc = -EINVAL;
    goto fail3;
  }

  /* Allocate a new file for the new endpoint */
  rc = onload_alloc_file(new_thr, new_s->b.bufid, priv->_filp->f_flags,
                         priv->fd_type, &old_ep->alien_ref);
  if( rc != 0 )
    goto fail4;
  ci_assert(old_ep->alien_ref);

  /* Copy F_SETOWN_EX, F_SETSIG to the new file */
#ifdef F_SETOWN_EX
  rcu_read_lock();
  __f_setown(old_ep->alien_ref->_filp, priv->_filp->f_owner.pid,
             priv->_filp->f_owner.pid_type, 1);
  rcu_read_unlock();
#endif
  old_ep->alien_ref->_filp->f_owner.signum = priv->_filp->f_owner.signum;
  old_ep->alien_ref->_filp->f_flags |= priv->_filp->f_flags & O_NONBLOCK;

  /* Move os_socket from one ep to another */
  if( tcp_helper_endpoint_set_aflags(new_ep, OO_THR_EP_AFLAG_ATTACHED) &
      OO_THR_EP_AFLAG_ATTACHED ) {
    fput(old_ep->alien_ref->_filp);
    rc = -EBUSY;
    goto fail2; /* state & filters are cleared by fput() */
  }

  /********* Point of no return  **********/
  ci_wmb();
  priv->fd_type = CI_PRIV_TYPE_ALIEN_EP;
  priv->_filp->f_op = &linux_tcp_helper_fops_alien;
  ci_wmb();
  oo_file_moved(priv);

  /* Read all already-arrived packets after the filters move but before
   * copying of the receive queue. */
  ci_netif_poll(&old_thr->netif);
  tcp_helper_endpoint_move_filters_post(old_ep, new_ep);
  ci_assert( efab_file_move_supported(&old_thr->netif, old_s));

  /* There's a gap between un-registering the old ep, and registering the
   * the new.  However, the notifications shouldn't be in use for sockets
   * that are in a state that can be moved, so this shouldn't be a problem.
   */
  if( old_ep->os_sock_pt.whead ) {
    pollwait_register = 1;
    efab_tcp_helper_os_pollwait_unregister(old_ep);
  }
  ci_assert_equal(new_ep->os_socket, NULL);
  new_ep->os_socket = oo_file_ref_xchg(&old_ep->os_socket, NULL);
  ci_assert_equal(old_ep->os_socket, NULL);
  if( pollwait_register )
    efab_tcp_helper_os_pollwait_register(new_ep);

  ci_bit_clear(&new_s->b.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT);
  if( new_s->b.state == CI_TCP_ESTABLISHED )
    CI_TCP_STATS_INC_CURR_ESTAB(alien_ni);


  /* Copy recv queue */
  if( new_s->b.state & CI_TCP_STATE_TCP ) {
    ci_tcp_state *new_ts = SOCK_TO_TCP(new_s);
    ci_tcp_state *old_ts = SOCK_TO_TCP(old_s);
    int i;

    /* Stop timers */
    ci_ip_timer_clear(&old_thr->netif, &old_ts->kalive_tid);
    ci_ip_timer_clear(&old_thr->netif, &old_ts->delack_tid);

    efab_ip_queue_copy(alien_ni, &new_ts->recv1,
                       &old_thr->netif, &old_ts->recv1);
    efab_ip_queue_copy(alien_ni, &new_ts->recv2,
                       &old_thr->netif, &old_ts->recv2);
    new_ts->recv1_extract = new_ts->recv1.head;

    /* Drop reorder buffer */
    ci_ip_queue_init(&new_ts->rob);
    new_ts->dsack_block = OO_PP_INVALID;
    new_ts->dsack_start = new_ts->dsack_end = 0;
    for( i = 0; i <= CI_TCP_SACK_MAX_BLOCKS; i++ )
      new_ts->last_sack[i] = OO_PP_NULL;
  }
  else {
    /* There should not be any recv q, but drop it to be sure */
    ci_udp_recv_q_init(&SOCK_TO_UDP(new_s)->recv_q);
  }

  /* Old stack can be unlocked */
  old_s->b.sb_flags |= CI_SB_FLAG_MOVED;
  ci_netif_unlock(&old_thr->netif);

  ci_assert( efab_file_move_supported(alien_ni, new_s) );

  /* Move done: poll for any new data. */
  ci_netif_poll(alien_ni);

  if( new_s->b.state & CI_TCP_STATE_TCP ) {
    ci_tcp_state *new_ts = SOCK_TO_TCP(new_s);
    /* Timers setup: delack, keepalive */
    if( (new_ts->acks_pending & CI_TCP_ACKS_PENDING_MASK) > 0)
      ci_tcp_timeout_delack(alien_ni, new_ts);
    ci_tcp_kalive_reset(alien_ni, new_ts);
  }


  /* Old ep: we are done. */
  ci_bit_set(&old_s->b.sb_aflags, CI_SB_AFLAG_MOVED_AWAY_BIT);
  old_s->b.moved_to_stack_id = alien_ni->state->stack_id;
  old_s->b.moved_to_sock_id = new_s->b.bufid;
  if( ! list_empty(&priv->_filp->f_ep_links) )
    ci_bit_set(&old_s->b.sb_aflags, CI_SB_AFLAG_MOVED_AWAY_IN_EPOLL_BIT);

  ci_sock_unlock(&old_thr->netif, &old_s->b);
  ci_sock_unlock(alien_ni, &new_s->b);
  ci_assert(ci_netif_is_locked(alien_ni));
  OO_DEBUG_TCPH(ci_log("%s: -> [%d:%d] %s", __func__,
                       new_thr->id, new_s->b.bufid,
                       ci_tcp_state_str(new_s->b.state)));
  return 0;

fail4:
  /* We clear the filters from the new ep.
   * For now, we do not need to re-insert old filters because hw filters
   * are alredy here (in case of accepted socket) or not needed.
   * We have not removed old sw filters yet. */
  tcp_helper_endpoint_move_filters_undo(old_ep, new_ep);
fail3:
  if( new_s->b.state & CI_TCP_STATE_TCP )
    ci_tcp_state_free(alien_ni, SOCK_TO_TCP(new_s));
  else
    ci_udp_state_free(alien_ni, SOCK_TO_UDP(new_s));
fail2:
  ci_netif_unlock(alien_ni);
fail1:
  ci_netif_unlock(&old_thr->netif);
fail1_ni_unlocked:
  ci_sock_unlock(&old_thr->netif, &old_s->b);
  OO_DEBUG_TCPH(ci_log("%s: rc=%d", __func__, rc));
  return rc;
}