예제 #1
0
static int
efab_tcp_helper_stack_attach(ci_private_t* priv, void *arg)
{
  oo_stack_attach_t* op = arg;
  tcp_helper_resource_t* trs = priv->thr;
  int rc;

  if( trs == NULL ) {
    LOG_E(ci_log("%s: ERROR: not attached to a stack", __FUNCTION__));
    return -EINVAL;
  }
  OO_DEBUG_TCPH(ci_log("%s: [%d]", __FUNCTION__, NI_ID(&trs->netif)));

  rc = oo_create_stack_fd(trs);
  if( rc < 0 ) {
    OO_DEBUG_ERR(ci_log("%s: oo_create_stack_fd failed (%d)",
                        __FUNCTION__, rc));
    return rc;
  }
  op->fd = rc;

  /* Re-read the OS socket buffer size settings.  This ensures we'll use
   * up-to-date values for this new socket.
   */
  efab_get_os_settings(&NI_OPTS_TRS(trs));
  op->out_nic_set = trs->netif.nic_set;
  op->out_map_size = trs->mem_mmap_bytes;
  return 0;
}
예제 #2
0
static int
efab_tcp_helper_pipe_attach(ci_private_t* priv, void *arg)
{
  oo_pipe_attach_t* op = arg;
  tcp_helper_resource_t* trs = priv->thr;
  tcp_helper_endpoint_t* ep = NULL;
  int rc;

  OO_DEBUG_TCPH(ci_log("%s: ep_id=%d", __FUNCTION__, op->ep_id));
  if( trs == NULL ) {
    LOG_E(ci_log("%s: ERROR: not attached to a stack", __FUNCTION__));
    return -EINVAL;
  }

  /* Validate and find the endpoint. */
  if( ! IS_VALID_SOCK_P(&trs->netif, op->ep_id) )
    return -EINVAL;
  ep = ci_trs_get_valid_ep(trs, op->ep_id);
  if( tcp_helper_endpoint_set_aflags(ep, OO_THR_EP_AFLAG_ATTACHED) &
      OO_THR_EP_AFLAG_ATTACHED )
    return -EBUSY;

  rc = oo_create_fd(ep, op->flags, CI_PRIV_TYPE_PIPE_READER);
  if( rc < 0 ) {
    tcp_helper_endpoint_clear_aflags(ep, OO_THR_EP_AFLAG_ATTACHED);
    return rc;
  }
  op->rfd = rc;

  rc = oo_create_fd(ep, op->flags, CI_PRIV_TYPE_PIPE_WRITER);
  if( rc < 0 ) {
    efab_linux_sys_close(op->rfd);
    tcp_helper_endpoint_clear_aflags(ep, OO_THR_EP_AFLAG_ATTACHED);
    return rc;
  }
  op->wfd = rc;

  return 0;
}
예제 #3
0
/* Move priv file to the alien_ni stack.
 * Should be called with the locked priv stack and socket;
 * the function returns with this stack being unlocked.
 * If rc=0, it returns with alien_ni stack locked;
 * otherwise, both stacks are unlocked.
 * Socket is always unlocked on return. */
int efab_file_move_to_alien_stack(ci_private_t *priv, ci_netif *alien_ni)
{
  tcp_helper_resource_t *old_thr = priv->thr;
  tcp_helper_resource_t *new_thr = netif2tcp_helper_resource(alien_ni);
  ci_sock_cmn *old_s = SP_TO_SOCK(&old_thr->netif, priv->sock_id);
  ci_sock_cmn *new_s;
  ci_sock_cmn *mid_s;
  tcp_helper_endpoint_t *old_ep, *new_ep;
  int rc, i;
  int pollwait_register = 0;
#if CI_CFG_FD_CACHING
  oo_p sp;
#endif

  OO_DEBUG_TCPH(ci_log("%s: move %d:%d to %d", __func__,
                       old_thr->id, priv->sock_id, new_thr->id));
  /* Poll the old stack - deliver all data to our socket */
  ci_netif_poll(&old_thr->netif);

  /* Endpoints in epoll list should not be moved, because waitq is already
   * in the epoll internal structures (bug 41152). */
  if( !list_empty(&priv->_filp->f_ep_links) ) {
    rc = -EBUSY;
    goto fail1;
  }

  if( !efab_file_move_supported(&old_thr->netif, old_s) ) {
    rc = -EINVAL;
    goto fail1;
  }

  /* Lock the second stack */
  i = 0;
  while( ! ci_netif_trylock(alien_ni) ) {
    ci_netif_unlock(&old_thr->netif);
    if( i++ >= 1000 ) {
      rc = -EBUSY;
      goto fail1_ni_unlocked;
    }
    rc = ci_netif_lock(&old_thr->netif);
    if( rc != 0 )
      goto fail1_ni_unlocked;
  }

  /* Allocate a new socket in the alien_ni stack */
  rc = -ENOMEM;
  if( old_s->b.state == CI_TCP_STATE_UDP ) {
    ci_udp_state *new_us = ci_udp_get_state_buf(alien_ni);
    if( new_us == NULL )
      goto fail2;
    new_s = &new_us->s;
  }
  else {
    ci_tcp_state *new_ts = ci_tcp_get_state_buf(alien_ni);
    if( new_ts == NULL )
      goto fail2;
    new_s = &new_ts->s;
  }

  /* Allocate an intermediate "socket" outside of everything */
  mid_s = ci_alloc(CI_MAX(sizeof(ci_tcp_state), sizeof(ci_udp_state)));
  if( mid_s == NULL )
    goto fail3;

  OO_DEBUG_TCPH(ci_log("%s: move %d:%d to %d:%d", __func__,
                       old_thr->id, priv->sock_id,
                       new_thr->id, new_s->b.bufid));

  /* Copy TCP/UDP state */
  memcpy(mid_s, old_s, CI_MAX(sizeof(ci_tcp_state), sizeof(ci_udp_state)));

  /* do not copy old_s->b.bufid
   * and other fields in stack adress space */
  mid_s->b.sb_aflags |= CI_SB_AFLAG_ORPHAN;
  mid_s->b.bufid = new_s->b.bufid;
  mid_s->b.post_poll_link = new_s->b.post_poll_link;
  mid_s->b.ready_link = new_s->b.ready_link;
  mid_s->reap_link = new_s->reap_link;

  if( old_s->b.state & CI_TCP_STATE_TCP ) {
    ci_tcp_state *new_ts = SOCK_TO_TCP(new_s);
    ci_tcp_state *mid_ts = SOCK_TO_TCP(mid_s);

    mid_ts->timeout_q_link = new_ts->timeout_q_link;
    mid_ts->tx_ready_link = new_ts->tx_ready_link;
    mid_ts->rto_tid = new_ts->rto_tid;
    mid_ts->delack_tid = new_ts->delack_tid;
    mid_ts->zwin_tid = new_ts->zwin_tid;
    mid_ts->kalive_tid = new_ts->kalive_tid;
    mid_ts->cork_tid = new_ts->cork_tid;
    ci_ip_queue_init(&mid_ts->recv1);
    ci_ip_queue_init(&mid_ts->recv2);
    ci_ip_queue_init(&mid_ts->send);
    ci_ip_queue_init(&mid_ts->retrans);
    mid_ts->send_prequeue = OO_PP_ID_NULL;
    new_ts->retrans_ptr = OO_PP_NULL;
    mid_ts->tmpl_head = OO_PP_NULL;
    oo_atomic_set(&mid_ts->send_prequeue_in, 0);

    *new_ts = *mid_ts;
    ci_pmtu_state_init(alien_ni, &new_ts->s, &new_ts->pmtus,
                       CI_IP_TIMER_PMTU_DISCOVER);
#if CI_CFG_FD_CACHING
    sp = TS_OFF(alien_ni, new_ts);
    OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_state, epcache_link));
    ci_ni_dllist_link_init(alien_ni, &new_ts->epcache_link, sp, "epch");
    ci_ni_dllist_self_link(alien_ni, &new_ts->epcache_link);
    sp = TS_OFF(alien_ni, new_ts);
    OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_state, epcache_fd_link));
    ci_ni_dllist_link_init(alien_ni, &new_ts->epcache_fd_link, sp, "ecfd");
    ci_ni_dllist_self_link(alien_ni, &new_ts->epcache_fd_link);
#endif
   
    /* free temporary mid_ts storage */
    CI_FREE_OBJ(mid_ts);
  }
  else {
    ci_udp_state *mid_us = SOCK_TO_UDP(mid_s);

    *SOCK_TO_UDP(new_s) = *mid_us;
    CI_FREE_OBJ(mid_us);
  }

  /* Move the filter */
  old_ep = ci_trs_ep_get(old_thr, priv->sock_id);
  new_ep = ci_trs_ep_get(new_thr, new_s->b.bufid);
  rc = tcp_helper_endpoint_move_filters_pre(old_ep, new_ep);
  if( rc != 0 ) {
    rc = -EINVAL;
    goto fail3;
  }

  /* Allocate a new file for the new endpoint */
  rc = onload_alloc_file(new_thr, new_s->b.bufid, priv->_filp->f_flags,
                         priv->fd_type, &old_ep->alien_ref);
  if( rc != 0 )
    goto fail4;
  ci_assert(old_ep->alien_ref);

  /* Copy F_SETOWN_EX, F_SETSIG to the new file */
#ifdef F_SETOWN_EX
  rcu_read_lock();
  __f_setown(old_ep->alien_ref->_filp, priv->_filp->f_owner.pid,
             priv->_filp->f_owner.pid_type, 1);
  rcu_read_unlock();
#endif
  old_ep->alien_ref->_filp->f_owner.signum = priv->_filp->f_owner.signum;
  old_ep->alien_ref->_filp->f_flags |= priv->_filp->f_flags & O_NONBLOCK;

  /* Move os_socket from one ep to another */
  if( tcp_helper_endpoint_set_aflags(new_ep, OO_THR_EP_AFLAG_ATTACHED) &
      OO_THR_EP_AFLAG_ATTACHED ) {
    fput(old_ep->alien_ref->_filp);
    rc = -EBUSY;
    goto fail2; /* state & filters are cleared by fput() */
  }

  /********* Point of no return  **********/
  ci_wmb();
  priv->fd_type = CI_PRIV_TYPE_ALIEN_EP;
  priv->_filp->f_op = &linux_tcp_helper_fops_alien;
  ci_wmb();
  oo_file_moved(priv);

  /* Read all already-arrived packets after the filters move but before
   * copying of the receive queue. */
  ci_netif_poll(&old_thr->netif);
  tcp_helper_endpoint_move_filters_post(old_ep, new_ep);
  ci_assert( efab_file_move_supported(&old_thr->netif, old_s));

  /* There's a gap between un-registering the old ep, and registering the
   * the new.  However, the notifications shouldn't be in use for sockets
   * that are in a state that can be moved, so this shouldn't be a problem.
   */
  if( old_ep->os_sock_pt.whead ) {
    pollwait_register = 1;
    efab_tcp_helper_os_pollwait_unregister(old_ep);
  }
  ci_assert_equal(new_ep->os_socket, NULL);
  new_ep->os_socket = oo_file_ref_xchg(&old_ep->os_socket, NULL);
  ci_assert_equal(old_ep->os_socket, NULL);
  if( pollwait_register )
    efab_tcp_helper_os_pollwait_register(new_ep);

  ci_bit_clear(&new_s->b.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT);
  if( new_s->b.state == CI_TCP_ESTABLISHED )
    CI_TCP_STATS_INC_CURR_ESTAB(alien_ni);


  /* Copy recv queue */
  if( new_s->b.state & CI_TCP_STATE_TCP ) {
    ci_tcp_state *new_ts = SOCK_TO_TCP(new_s);
    ci_tcp_state *old_ts = SOCK_TO_TCP(old_s);
    int i;

    /* Stop timers */
    ci_ip_timer_clear(&old_thr->netif, &old_ts->kalive_tid);
    ci_ip_timer_clear(&old_thr->netif, &old_ts->delack_tid);

    efab_ip_queue_copy(alien_ni, &new_ts->recv1,
                       &old_thr->netif, &old_ts->recv1);
    efab_ip_queue_copy(alien_ni, &new_ts->recv2,
                       &old_thr->netif, &old_ts->recv2);
    new_ts->recv1_extract = new_ts->recv1.head;

    /* Drop reorder buffer */
    ci_ip_queue_init(&new_ts->rob);
    new_ts->dsack_block = OO_PP_INVALID;
    new_ts->dsack_start = new_ts->dsack_end = 0;
    for( i = 0; i <= CI_TCP_SACK_MAX_BLOCKS; i++ )
      new_ts->last_sack[i] = OO_PP_NULL;
  }
  else {
    /* There should not be any recv q, but drop it to be sure */
    ci_udp_recv_q_init(&SOCK_TO_UDP(new_s)->recv_q);
  }

  /* Old stack can be unlocked */
  old_s->b.sb_flags |= CI_SB_FLAG_MOVED;
  ci_netif_unlock(&old_thr->netif);

  ci_assert( efab_file_move_supported(alien_ni, new_s) );

  /* Move done: poll for any new data. */
  ci_netif_poll(alien_ni);

  if( new_s->b.state & CI_TCP_STATE_TCP ) {
    ci_tcp_state *new_ts = SOCK_TO_TCP(new_s);
    /* Timers setup: delack, keepalive */
    if( (new_ts->acks_pending & CI_TCP_ACKS_PENDING_MASK) > 0)
      ci_tcp_timeout_delack(alien_ni, new_ts);
    ci_tcp_kalive_reset(alien_ni, new_ts);
  }


  /* Old ep: we are done. */
  ci_bit_set(&old_s->b.sb_aflags, CI_SB_AFLAG_MOVED_AWAY_BIT);
  old_s->b.moved_to_stack_id = alien_ni->state->stack_id;
  old_s->b.moved_to_sock_id = new_s->b.bufid;
  if( ! list_empty(&priv->_filp->f_ep_links) )
    ci_bit_set(&old_s->b.sb_aflags, CI_SB_AFLAG_MOVED_AWAY_IN_EPOLL_BIT);

  ci_sock_unlock(&old_thr->netif, &old_s->b);
  ci_sock_unlock(alien_ni, &new_s->b);
  ci_assert(ci_netif_is_locked(alien_ni));
  OO_DEBUG_TCPH(ci_log("%s: -> [%d:%d] %s", __func__,
                       new_thr->id, new_s->b.bufid,
                       ci_tcp_state_str(new_s->b.state)));
  return 0;

fail4:
  /* We clear the filters from the new ep.
   * For now, we do not need to re-insert old filters because hw filters
   * are alredy here (in case of accepted socket) or not needed.
   * We have not removed old sw filters yet. */
  tcp_helper_endpoint_move_filters_undo(old_ep, new_ep);
fail3:
  if( new_s->b.state & CI_TCP_STATE_TCP )
    ci_tcp_state_free(alien_ni, SOCK_TO_TCP(new_s));
  else
    ci_udp_state_free(alien_ni, SOCK_TO_UDP(new_s));
fail2:
  ci_netif_unlock(alien_ni);
fail1:
  ci_netif_unlock(&old_thr->netif);
fail1_ni_unlocked:
  ci_sock_unlock(&old_thr->netif, &old_s->b);
  OO_DEBUG_TCPH(ci_log("%s: rc=%d", __func__, rc));
  return rc;
}
예제 #4
0
static int
efab_tcp_helper_move_state(ci_private_t* priv, void *arg)
{
  oo_tcp_move_state_t *op = arg;
  tcp_helper_endpoint_t *new_ep;
  tcp_helper_resource_t * new_trs = NULL;
  ci_netif* ni, *new_ni;
  ci_tcp_state * ts, *new_ts;
  tcp_helper_endpoint_t* ep;
  int rc = efab_ioctl_get_ep(priv, op->ep_id, &ep);
  if (rc != 0)
    return rc;

  OO_DEBUG_TCPH(ci_log("%s: (trs=%p (%u), priv=%p, ep_id=%u, new_trs_id=%u, "
                       "new_ep_id=%u", __FUNCTION__, priv->thr, priv->thr->id,
                       priv, OO_SP_FMT(op->ep_id), op->new_trs_id,
                       OO_SP_FMT(op->new_ep_id)));

  do {
    /* check that the existing id is valid */
    ni = &priv->thr->netif;
    ts = SP_TO_TCP(ni, ep->id);

    /* TODO: check this endpoint belongs to the tcp helper resource of priv and not
     * somewhere else */
    
    /* this function does not change fd_type or fd ops, so it is not able
     * to cope with changing the socket type. We think this only makes sense
     * for TCP, so assert we are taking a TCP endpoint.
     */
    ci_assert_equal(ts->s.pkt.ip.ip_protocol, IPPROTO_TCP);
    ci_assert_equal(priv->fd_type, CI_PRIV_TYPE_TCP_EP);

    /* get pointer to resource from handle - increments ref count */
    rc = efab_thr_table_lookup(NULL, op->new_trs_id,
                               EFAB_THR_TABLE_LOOKUP_CHECK_USER, &new_trs);
    if (rc < 0) {
      OO_DEBUG_ERR( ci_log("%s: invalid new resource handle", __FUNCTION__) );
      break;
    }
    ci_assert(new_trs != NULL);
    /* check valid endpoint in new netif */
    new_ni = &new_trs->netif;
    new_ep = ci_netif_get_valid_ep(new_ni, op->new_ep_id);
    new_ts = SP_TO_TCP(new_ni, new_ep->id);

    /* check the two endpoint states look valid */
    if( (ts->s.pkt.ip.ip_protocol != new_ts->s.pkt.ip.ip_protocol) ||
        (ts->s.b.state != CI_TCP_CLOSED) ||
        (ep->oofilter.sf_local_port != NULL) ) {
      efab_thr_release(new_trs);
      rc = -EINVAL;
      OO_DEBUG_ERR(ci_log("%s: invalid endpoint states", __FUNCTION__));
      break;
    }

    /* should be fine to complete */
    ci_assert(new_trs);
    {
      tcp_helper_resource_t *old_trs;
    again:
      old_trs = priv->thr;
      if (ci_cas_uintptr_fail((ci_uintptr_t *)&priv->thr,
                              (ci_uintptr_t)old_trs, (ci_uintptr_t)new_trs))
        goto again;
      efab_thr_release(old_trs);
    }

    /* move file to hold details of new resource, new endpoint */
    ci_assert(OO_SP_EQ(priv->sock_id, op->ep_id));
    priv->sock_id = new_ep->id;

    OO_DEBUG_TCPH(ci_log("%s: set epid %u", __FUNCTION__,
                         OO_SP_FMT(priv->sock_id)));
    
    /* copy across any necessary state */

    ci_assert_equal(new_ep->os_socket, NULL);
    new_ep->os_socket = ep->os_socket;
    ep->os_socket = NULL;

    /* set ORPHAN flag in current as not attached to an FD */
    ci_bit_set(&ts->s.b.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT);
    /* remove ORPHAN flag in new TCP state */
    ci_atomic32_and(&new_ts->s.b.sb_aflags,
		    ~(CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_TCP_IN_ACCEPTQ));

    return 0;

  } while (0);

  return rc;

}
예제 #5
0
static int
efab_tcp_helper_sock_attach(ci_private_t* priv, void *arg)
{
  oo_sock_attach_t* op = arg;
  tcp_helper_resource_t* trs = priv->thr;
  tcp_helper_endpoint_t* ep = NULL;
  citp_waitable_obj *wo;
  int rc, flags, type = op->type;

/* SOCK_CLOEXEC and SOCK_NONBLOCK exist from 2.6.27 both */
#ifdef SOCK_TYPE_MASK
  BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
  flags = type & (SOCK_CLOEXEC | SOCK_NONBLOCK);
  type &= SOCK_TYPE_MASK;
# ifdef SOCK_NONBLOCK
    if( SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK) )
      flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
# endif
#else
  flags = 0;
#endif

  OO_DEBUG_TCPH(ci_log("%s: ep_id=%d", __FUNCTION__, op->ep_id));
  if( trs == NULL ) {
    LOG_E(ci_log("%s: ERROR: not attached to a stack", __FUNCTION__));
    return -EINVAL;
  }

  /* Validate and find the endpoint. */
  if( ! IS_VALID_SOCK_P(&trs->netif, op->ep_id) )
    return -EINVAL;
  ep = ci_trs_get_valid_ep(trs, op->ep_id);
  if( tcp_helper_endpoint_set_aflags(ep, OO_THR_EP_AFLAG_ATTACHED) &
      OO_THR_EP_AFLAG_ATTACHED )
    return -EBUSY;
  wo = SP_TO_WAITABLE_OBJ(&trs->netif, ep->id);

  /* create OS socket */
  if( op->domain != AF_UNSPEC ) {
    struct socket *sock;
    struct file *os_file;

    rc = sock_create(op->domain, type, 0, &sock);
    if( rc < 0 ) {
      LOG_E(ci_log("%s: ERROR: sock_create(%d, %d, 0) failed (%d)",
                   __FUNCTION__, op->domain, type, rc));
      tcp_helper_endpoint_clear_aflags(ep, OO_THR_EP_AFLAG_ATTACHED);
      return rc;
    }
    os_file = sock_alloc_file(sock, flags, NULL);
    if( IS_ERR(os_file) ) {
      LOG_E(ci_log("%s: ERROR: sock_alloc_file failed (%ld)",
                   __FUNCTION__, PTR_ERR(os_file)));
      sock_release(sock);
      tcp_helper_endpoint_clear_aflags(ep, OO_THR_EP_AFLAG_ATTACHED);
      return PTR_ERR(os_file);
    }
    rc = efab_attach_os_socket(ep, os_file);
    if( rc < 0 ) {
      LOG_E(ci_log("%s: ERROR: efab_attach_os_socket failed (%d)",
                   __FUNCTION__, rc));
      /* NB. efab_attach_os_socket() consumes [os_file] even on error. */
      tcp_helper_endpoint_clear_aflags(ep, OO_THR_EP_AFLAG_ATTACHED);
      return rc;
    }
    wo->sock.domain = op->domain;
    wo->sock.ino = ep->os_socket->file->f_dentry->d_inode->i_ino;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
    wo->sock.uid = ep->os_socket->file->f_dentry->d_inode->i_uid;
#else
    wo->sock.uid = __kuid_val(ep->os_socket->file->f_dentry->d_inode->i_uid);
#endif
  }

  /* Create a new file descriptor to attach the stack to. */
  ci_assert((wo->waitable.state & CI_TCP_STATE_TCP) ||
            wo->waitable.state == CI_TCP_STATE_UDP);
  rc = oo_create_fd(ep, flags,
                    (wo->waitable.state & CI_TCP_STATE_TCP) ?
                    CI_PRIV_TYPE_TCP_EP : CI_PRIV_TYPE_UDP_EP);
  if( rc < 0 ) {
    ci_irqlock_state_t lock_flags;
    struct oo_file_ref* os_socket;
    ci_irqlock_lock(&ep->thr->lock, &lock_flags);
    os_socket = ep->os_socket;
    ep->os_socket = NULL;
    ci_irqlock_unlock(&ep->thr->lock, &lock_flags);
    if( os_socket != NULL )
      oo_file_ref_drop(os_socket);
    tcp_helper_endpoint_clear_aflags(ep, OO_THR_EP_AFLAG_ATTACHED);
    return rc;
  }

  op->fd = rc;
#ifdef SOCK_NONBLOCK
  if( op->type & SOCK_NONBLOCK )
    ci_bit_mask_set(&wo->waitable.sb_aflags, CI_SB_AFLAG_O_NONBLOCK);
#endif

  /* Re-read the OS socket buffer size settings.  This ensures we'll use
   * up-to-date values for this new socket.
   */
  efab_get_os_settings(&NI_OPTS_TRS(trs));
  return 0;
}