Ejemplo n.º 1
0
int oo_spinloop_run_pending_sigs(ci_netif* ni, citp_waitable* w,
                                 citp_signal_info* si, int have_timeout)
{
  int inside_lib;
  if( have_timeout )
    return -EINTR;
  if( w )
    ci_sock_unlock(ni, w);
  inside_lib = si->inside_lib;
  si->inside_lib = 0;
  ci_compiler_barrier();
  citp_signal_run_pending(si);
  si->inside_lib = inside_lib;
  ci_compiler_barrier();
  if( w )
    ci_sock_lock(ni, w);
  if( ~si->aflags & OO_SIGNAL_FLAG_NEED_RESTART )
    /* handler sets need_restart, exit if no restart is necessary */
    return -EINTR;
  return 0;
}
Ejemplo n.º 2
0
static int ci_tcp_shutdown_listen(citp_socket* ep, int how, ci_fd_t fd)
{
  ci_tcp_socket_listen* tls = SOCK_TO_TCP_LISTEN(ep->s);

  if( how == SHUT_WR )
    return 0;

  ci_sock_lock(ep->netif, &tls->s.b);
  ci_netif_lock(ep->netif);
  LOG_TC(ci_log(SK_FMT" shutdown(SHUT_RD)", SK_PRI_ARGS(ep)));
  __ci_tcp_listen_shutdown(ep->netif, tls, fd);
  __ci_tcp_listen_to_normal(ep->netif, tls);
  {
    ci_fd_t os_sock = ci_get_os_sock_fd(ep, fd);
    int flags = ci_sys_fcntl(os_sock, F_GETFL);
    flags &= (~O_NONBLOCK);
    CI_TRY(ci_sys_fcntl(os_sock, F_SETFL, flags));
    ci_rel_os_sock_fd(os_sock);
  }
  ci_netif_unlock(ep->netif);
  ci_sock_unlock(ep->netif, &tls->s.b);
  return 0;
}
Ejemplo n.º 3
0
int efab_file_move_to_alien_stack_rsop(ci_private_t *stack_priv, void *arg)
{
  ci_fixed_descriptor_t sock_fd = *(ci_fixed_descriptor_t *)arg;
  struct file *sock_file = fget(sock_fd);
  ci_private_t *sock_priv;
  tcp_helper_resource_t *old_thr;
  tcp_helper_resource_t *new_thr;
  citp_waitable *w;
  int rc;

  if( sock_file == NULL )
    return -EINVAL;
  if( !FILE_IS_ENDPOINT_SOCK(sock_file) ||
      stack_priv->fd_type != CI_PRIV_TYPE_NETIF ) {
    fput(sock_file);
    return -EINVAL;
  }
  sock_priv = sock_file->private_data;
  ci_assert(sock_priv->fd_type == CI_PRIV_TYPE_TCP_EP ||
            sock_priv->fd_type == CI_PRIV_TYPE_UDP_EP);

  old_thr = sock_priv->thr;
  new_thr = stack_priv->thr;
  ci_assert(old_thr);
  ci_assert(new_thr);

  if( old_thr == new_thr ) {
    fput(sock_file);
    return 0;
  }

  if( tcp_helper_cluster_from_cluster(old_thr) != 0 ) {
    LOG_S(ci_log("%s: move_fd() not permitted on clustered stacks", __func__));
    fput(sock_file);
    return -EINVAL;
  }

  w = SP_TO_WAITABLE(&old_thr->netif, sock_priv->sock_id);
  rc = ci_sock_lock(&old_thr->netif, w);
  if( rc != 0 ) {
    fput(sock_file);
    return rc;
  }

  rc = ci_netif_lock(&old_thr->netif);
  if( rc != 0 ) {
    ci_sock_unlock(&old_thr->netif, w);
    fput(sock_file);
    return rc;
  }

  efab_thr_ref(new_thr);
  rc = efab_file_move_to_alien_stack(sock_priv, &stack_priv->thr->netif);
  fput(sock_file);

  if( rc != 0 )
    efab_thr_release(new_thr);
  else
    ci_netif_unlock(&new_thr->netif);

  return rc;
}
Ejemplo n.º 4
0
static int ci_udp_ioctl_locked(ci_netif* ni, ci_udp_state* us,
                               ci_fd_t fd, int request, void* arg)
{
  int rc;

  switch( request ) {
  case FIONREAD: /* synonym of SIOCINQ */
    if( ! CI_IOCTL_ARG_OK(int, arg) )
      return -EFAULT;
    rc = 1;
#if CI_CFG_ZC_RECV_FILTER
# ifndef __KERNEL__
    if( us->recv_q_filter ) {
      ci_netif_unlock(ni);
      ci_sock_lock(ni, &us->s.b);
      rc = ci_udp_recv_q_readable(ni, us);
      ci_sock_unlock(ni, &us->s.b);
      ci_netif_lock(ni);
    }
# endif
#endif
    if( rc ) {
      /* Return the size of the datagram at the head of the receive queue.
       *
       * Careful: extract side of receive queue is owned by sock lock,
       * which we don't have.  However, freeing of bufs is owned by netif
       * lock, which we do have.  So we're safe so long as we only read
       * [extract] once.
       */
      oo_pkt_p extract = us->recv_q.extract;
      if( OO_PP_NOT_NULL(extract) ) {
        ci_ip_pkt_fmt* pkt = PKT_CHK(ni, extract);
        if( (pkt->pf.udp.rx_flags & CI_IP_PKT_FMT_PREFIX_UDP_RX_CONSUMED) &&
            OO_PP_NOT_NULL(pkt->next) )
          pkt = PKT_CHK(ni, pkt->next);
        if( !(pkt->pf.udp.rx_flags & CI_IP_PKT_FMT_PREFIX_UDP_RX_CONSUMED) ) {
          *(int*) arg = pkt->pf.udp.pay_len;
          return 0;
        }
      }
    }
    /* Nothing in userlevel receive queue: So take the value returned by
     * the O/S socket.
     */
    if( !(us->s.os_sock_status & OO_OS_STATUS_RX) ) {
      *(int*)arg = 0;
      return 0;
    }
    goto sys_ioctl;

  case TIOCOUTQ: /* synonym of SIOCOUTQ */
    if( ! CI_IOCTL_ARG_OK(int, arg) )
      return -EFAULT;

    *(int*)arg = us->tx_count + oo_atomic_read(&us->tx_async_q_level);
    return 0;

  case SIOCGSTAMP:
#if defined( __linux__) && defined(__KERNEL__)
/* The following code assumes the width of the timespec and timeval fields */
# error "Need to consider 32-on-64 bit setting of timeval arg" 
#endif
    if( ! (us->udpflags & CI_UDPF_LAST_RECV_ON) )
      return oo_os_sock_ioctl(ni, us->s.b.bufid, request, arg, NULL);
    return ci_udp_ioctl_siocgstamp(ni, us, arg, 1);
  case SIOCGSTAMPNS:
    if( ! (us->udpflags & CI_UDPF_LAST_RECV_ON) )
      return oo_os_sock_ioctl(ni, us->s.b.bufid, request, arg, NULL);
    return ci_udp_ioctl_siocgstamp(ni, us, arg, 0);
  }

  return ci_udp_ioctl_slow(ni, us, fd, request, arg);

 sys_ioctl:
  return oo_os_sock_ioctl(ni, us->s.b.bufid, request, arg, NULL);
}
Ejemplo n.º 5
0
int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog)
{
  /* 
  ** ?? error handling on possible fails not handled robustly...
  ** ?? Need to check port number is valid TODO
  */

  /*! \todo If not bound then we have to be listening on all interfaces.
   * It's likely that we won't be coming through here as we have to
   * listen on the OS socket too! */
  ci_tcp_state* ts;
  ci_tcp_socket_listen* tls;
  ci_netif* netif = ep->netif;
  ci_sock_cmn* s = ep->s;
  unsigned ul_backlog = backlog;
  int rc;
  oo_p sp;

  LOG_TC(log("%s "SK_FMT" listen backlog=%d", __FUNCTION__, SK_PRI_ARGS(ep), 
             backlog));
  CHECK_TEP(ep);

  if( NI_OPTS(netif).tcp_listen_handover )
    return CI_SOCKET_HANDOVER;
  if( !NI_OPTS(netif).tcp_server_loopback) {
    /* We should handover if the socket is bound to alien address. */
    if( s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN )
      return CI_SOCKET_HANDOVER;
  }

  if( ul_backlog < 0 )
    ul_backlog = NI_OPTS(netif).max_ep_bufs;
  else if( ul_backlog < NI_OPTS(netif).acceptq_min_backlog )
    ul_backlog = NI_OPTS(netif).acceptq_min_backlog;

  if( s->b.state == CI_TCP_LISTEN ) {
    tls = SOCK_TO_TCP_LISTEN(s);
    tls->acceptq_max = ul_backlog;
    ci_tcp_helper_listen_os_sock(fd, ul_backlog);
    return 0;
  }

  if( s->b.state != CI_TCP_CLOSED ) {
    CI_SET_ERROR(rc, EINVAL);
    return rc;
  }


  ts = SOCK_TO_TCP(s);

  /* Bug 3376: if socket used for a previous, failed, connect then the error
   * numbers will not be as expected.  Only seen when not using listening
   * netifs (as moving the EP to the new netif resets them). 
   */

  ts->s.tx_errno = EPIPE;



  ts->s.rx_errno = ENOTCONN;

  /* fill in address/ports and all TCP state */
  if( !(ts->s.s_flags & CI_SOCK_FLAG_BOUND) ) {
    ci_uint16 source_be16;

    /* They haven't previously done a bind, so we need to choose 
     * a port.  As we haven't been given a hint we let the OS choose. */

    source_be16 = 0;
    rc = __ci_bind(ep->netif, ep->s, ts->s.pkt.ip.ip_saddr_be32, &source_be16);
    if (CI_LIKELY( rc==0 )) {
      TS_TCP(ts)->tcp_source_be16 = source_be16;
      ts->s.cp.lport_be16 = source_be16;
      LOG_TC(log(LNT_FMT "listen: our bind returned %s:%u", 
                 LNT_PRI_ARGS(ep->netif, ts),
                 ip_addr_str(ts->s.pkt.ip.ip_saddr_be32),
                 (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16)));

    } else {
      LOG_U(ci_log("__ci_bind returned %d at %s:%d", CI_GET_ERROR(rc),
                   __FILE__, __LINE__));
      return rc;
    }
  } 

  ci_sock_lock(netif, &ts->s.b);
  ci_tcp_set_slow_state(netif, ts, CI_TCP_LISTEN);
  tls = SOCK_TO_TCP_LISTEN(&ts->s);

  tcp_raddr_be32(tls) = 0u;
  tcp_rport_be16(tls) = 0u;

  ci_assert_equal(tls->s.tx_errno, EPIPE);



  ci_assert_equal(tls->s.rx_errno, ENOTCONN);

  /* setup listen timer - do it before the first return statement,
   * because __ci_tcp_listen_to_normal() will be called on error path. */
  if( ~tls->s.s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) {
    sp = TS_OFF(netif, tls);
    OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_socket_listen, listenq_tid));
    ci_ip_timer_init(netif, &tls->listenq_tid, sp, "lstq");
    tls->listenq_tid.param1 = S_SP(tls);
    tls->listenq_tid.fn = CI_IP_TIMER_TCP_LISTEN;
  }

  rc = ci_tcp_listen_init(netif, tls);
  ci_sock_unlock(netif, &ts->s.b);
  if( rc != 0 ) {
    CI_SET_ERROR(rc, -rc);
    goto listen_fail;
  }
  tls->acceptq_max = ul_backlog;

  CITP_STATS_TCP_LISTEN(CI_ZERO(&tls->stats));

  /* install all the filters needed for this connection 
   *    - tcp_laddr_be32(ts) = 0 for IPADDR_ANY
   *
   *  TODO: handle BINDTODEVICE by setting phys_port paramter to correct 
   *        physical L5 port index
   *  TODO: handle REUSEADDR by setting last paramter to TRUE
   */
  if( ~s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) {
#ifdef ONLOAD_OFE
    if( netif->ofe != NULL ) {
      tls->s.ofe_code_start = ofe_socktbl_find(
                        netif->ofe, OFE_SOCKTYPE_TCP_LISTEN,
                        tcp_laddr_be32(tls), INADDR_ANY,
                        tcp_lport_be16(ts), 0);
      tls->ofe_promote = ofe_socktbl_find(
                        netif->ofe, OFE_SOCKTYPE_TCP_PASSIVE,
                        tcp_laddr_be32(tls), INADDR_ANY,
                        tcp_lport_be16(ts), 0);
    }
#endif
    rc = ci_tcp_ep_set_filters(netif, S_SP(tls), tls->s.cp.so_bindtodevice,
                               OO_SP_NULL);
    if( rc == -EFILTERSSOME ) {
      if( CITP_OPTS.no_fail )
        rc = 0;
      else {
        ci_tcp_ep_clear_filters(netif, S_SP(tls), 0);
        rc = -ENOBUFS;
      }
    }
    ci_assert_nequal(rc, -EFILTERSSOME);
    VERB(ci_log("%s: set_filters  returned %d", __FUNCTION__, rc));
    if (rc < 0) {
      CI_SET_ERROR(rc, -rc);
      goto post_listen_fail;
    }
  }


  /* 
   * Call of system listen() is required for listen any, local host
   * communications server and multi-homed server (to accept connections
   * to L5 assigned address(es), but incoming from other interfaces).
   */
#ifdef __ci_driver__
  {
    rc = efab_tcp_helper_listen_os_sock( netif2tcp_helper_resource(netif),
					 S_SP(tls), backlog);
  }
#else
  rc = ci_tcp_helper_listen_os_sock(fd, backlog);
#endif
  if ( rc < 0 ) {
    /* clear the filter we've just set */
    ci_tcp_ep_clear_filters(netif, S_SP(tls), 0);
    goto post_listen_fail;
  }
  return 0;

 post_listen_fail:
  ci_tcp_listenq_drop_all(netif, tls);
 listen_fail:
  /* revert TCP state to a non-listening socket format */
  __ci_tcp_listen_to_normal(netif, tls);
  /* Above function sets orphan flag but we are attached to an FD. */
  ci_bit_clear(&tls->s.b.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT);
#ifdef __ci_driver__
  return rc;
#else
  return CI_SOCKET_ERROR;
#endif
}
Ejemplo n.º 6
0
/* This function must be called with netif lock not held and it always
 * returns with the netif lock not held.
 */
int efab_tcp_helper_reuseport_bind(ci_private_t *priv, void *arg)
{
  oo_tcp_reuseport_bind_t* trb = arg;
  ci_netif* ni = &priv->thr->netif;
  tcp_helper_cluster_t* thc;
  tcp_helper_resource_t* thr = NULL;
  citp_waitable* waitable;
  ci_sock_cmn* sock = SP_TO_SOCK(ni, priv->sock_id);
  struct oof_manager* fm = efab_tcp_driver.filter_manager;
  struct oof_socket* oofilter;
  struct oof_socket dummy_oofilter;
  int protocol = thc_get_sock_protocol(sock);
  char name[CI_CFG_CLUSTER_NAME_LEN + 1];
  int rc, rc1;
  int flags = 0;
  tcp_helper_cluster_t* named_thc,* ported_thc;
  int alloced = 0;

  /* No clustering on sockets bound to alien addresses */
  if( sock->s_flags & CI_SOCK_FLAG_BOUND_ALIEN )
    return 0;

  if( NI_OPTS(ni).cluster_ignore == 1 ) {
    LOG_NV(ci_log("%s: Ignored attempt to use clusters due to "
                  "EF_CLUSTER_IGNORE option.", __FUNCTION__));
    return 0;
  }

  if( trb->port_be16 == 0 ) {
    ci_log("%s: Reuseport on port=0 is not supported", __FUNCTION__);
    return -EINVAL;
  }

  if( trb->cluster_size < 2 ) {
    ci_log("%s: Cluster sizes < 2 are not supported", __FUNCTION__);
    return -EINVAL;
  }

  if( sock->s_flags & (CI_SOCK_FLAG_TPROXY | CI_SOCK_FLAG_MAC_FILTER) ) {
    ci_log("%s: Scalable filter sockets cannot be clustered",
           __FUNCTION__);
    return -EINVAL;
  }

  oofilter = &ci_trs_ep_get(priv->thr, priv->sock_id)->oofilter;

  if( oofilter->sf_local_port != NULL ) {
    ci_log("%s: Socket that already have filter cannot be clustered",
           __FUNCTION__);
    return -EINVAL;
  }

  if( priv->thr->thc ) {
    /* Reserve proto:port[:ip] until bind (or close)*/
    rc = oof_socket_add(fm, oofilter,
                       OOF_SOCKET_ADD_FLAG_CLUSTERED |
                       OOF_SOCKET_ADD_FLAG_DUMMY,
                       protocol, trb->addr_be32, trb->port_be16, 0, 0,
                       &ported_thc);
    if( rc > 0 )
      rc = 0;
    if( rc == 0 )
      sock->s_flags |= CI_SOCK_FLAG_FILTER;
    return rc;
  }

  mutex_lock(&thc_init_mutex);
  /* We are going to be iterating over clusters, make sure they don't
   * change.
   */
  mutex_lock(&thc_mutex);

  /* Lookup a suitable cluster to use */

  /* We try to add dummy filter to oof to reserve proto:port[:ip] tuple,
   * if there is already a cluster at the tuple we will get reference to it,
   */
  oof_socket_ctor(&dummy_oofilter);
  rc = oof_socket_add(fm, &dummy_oofilter,
                      OOF_SOCKET_ADD_FLAG_CLUSTERED |
                      OOF_SOCKET_ADD_FLAG_DUMMY |
                      OOF_SOCKET_ADD_FLAG_NO_STACK,
                      protocol, trb->addr_be32, trb->port_be16, 0, 0,
                      &ported_thc);
  if( rc < 0 ) /* non-clustered socket on the tuple */
    goto alloc_fail0;

  if( ! gen_cluster_name(trb->cluster_name, name) ) {
    /* user requested a cluster by name.  But we need to make sure
     * that the oof_local_port that the user is interested in is not
     * being used by another cluster.  We search for cluster by name
     * and use results of prior protp:port[:ip] search oof_local_port
     * to then do some sanity checking.
     */
    rc1 = thc_search_by_name(name, protocol, trb->port_be16, ci_geteuid(),
                            &named_thc);
    if( rc1 < 0 ) {
      rc = rc1;
      goto alloc_fail;
    }

    if( rc1 == 0 ) {
      if( rc == 1 ) {
        /* search by oof_local_port found a cluster which search by
         * name didn't find. */
        LOG_E(ci_log("Error: Cluster with requested name %s already "
                     "bound to %s", name, ported_thc->thc_name));
        rc = -EEXIST;
        goto alloc_fail;
      }
      else {
        /* Neither searches found a cluster.  So allocate one below.
         */
      }
    }
    else {
      if( rc == 1 ) {
        /* Both searches found clusters.  Fine if they are the same or
         * else error. */
        if( named_thc != ported_thc ) {
          LOG_E(ci_log("Error: Cluster %s does not handle socket %s:%d.  "
                       "Cluster %s does", name, FMT_PROTOCOL(protocol),
                       trb->port_be16, named_thc->thc_name));
          rc = -EEXIST;
          goto alloc_fail;
        }
      }
      /* Search by name found a cluster no conflict with search by tuple
       * (the ported cluster is either none or the same as named)*/
      thc = named_thc;
      goto cont;
    }
  }
  else {
    /* No cluster name requested.  We have already looked for a cluster handling
     * the tuple.  If none found, then try to use an existing
     * cluster this process created.  If none found, then allocate one.
     */
    /* If rc == 0, then no cluster found - try to allocate one.
     * If rc == 1, we found cluster - make sure that euids match and continue. */
    if( rc == 1 ) {
      thc = ported_thc;
      if( thc->thc_euid != ci_geteuid() ) {
        rc = -EADDRINUSE;
        goto alloc_fail;
      }
      goto cont;
    }
    rc = thc_search_by_name(name, protocol, trb->port_be16, ci_geteuid(),
                            &thc);
    if( rc < 0 )
      goto alloc_fail;
    if( rc == 1 )
      goto cont;
  }
  /* When an interface is in tproxy mode, all clustered listening socket
   * are assumed to be part of tproxy passive side.  This requires
   * rss context to use altered rss hashing based solely on src ip:port.
   */
  flags = tcp_helper_cluster_thc_flags(&NI_OPTS(ni));

  if( (rc = thc_alloc(name, protocol, trb->port_be16, ci_geteuid(),
                      trb->cluster_size, flags, &thc)) != 0 )
      goto alloc_fail;

  alloced = 1;

 cont:
  tcp_helper_cluster_ref(thc);

  /* At this point we have our cluster with one additional reference */

  /* Find a suitable stack within the cluster to use */
  rc = thc_get_thr(thc, &dummy_oofilter, &thr);
  if( rc != 0 )
    rc = thc_alloc_thr(thc, trb->cluster_restart_opt,
                       &ni->opts, ni->flags, &thr);

  /* If get or alloc succeeded thr holds reference to the cluster,
   * so the cluster cannot go away.  We'll drop our reference and also
   * will not be accessing state within the cluster anymore so we can
   * drop the lock. */
  mutex_unlock(&thc_mutex);

  if( alloced && rc == 0 && (flags & THC_FLAG_TPROXY) != 0 ) {
    /* Tproxy filter is allocated as late as here,
     * the reason is that this needs to be preceded by stack allocation
     * (firmware needs initialized vi) */
    rc = thc_install_tproxy(thc, NI_OPTS(ni).scalable_filter_ifindex);
    if( rc != 0 )
      efab_thr_release(thr);
  }

  tcp_helper_cluster_release(thc, NULL);

  if( rc != 0 ) {
    oof_socket_del(fm, &dummy_oofilter);
    goto alloc_fail_unlocked;
  }

  /* We have thr and we hold single reference to it. */

  /* Move the socket into the new stack */
  if( (rc = ci_netif_lock(ni)) != 0 )
    goto drop_and_done;
  waitable = SP_TO_WAITABLE(ni, priv->sock_id);
  rc = ci_sock_lock(ni, waitable);
  if( rc != 0 ) {
    ci_netif_unlock(ni);
    goto drop_and_done;
  }
  /* thr referencing scheme comes from efab_file_move_to_alien_stack_rsop */
  efab_thr_ref(thr);
  rc = efab_file_move_to_alien_stack(priv, &thr->netif, 0);
  if( rc != 0 )
    efab_thr_release(thr);
  else {
    /* beside us, socket now holds its own reference to thr */
    oofilter = &ci_trs_ep_get(thr, sock->b.moved_to_sock_id)->oofilter;
    oof_socket_replace(fm, &dummy_oofilter, oofilter);
    SP_TO_SOCK(&thr->netif, sock->b.moved_to_sock_id)->s_flags |= CI_SOCK_FLAG_FILTER;
    ci_netif_unlock(&thr->netif);
  }

 drop_and_done:
  if( rc != 0 )
    oof_socket_del(fm, &dummy_oofilter);
  /* Drop the reference we got from thc_get_thr or thc_alloc_thr().
   * If things went wrong both stack and cluster might disappear. */
  efab_thr_release(thr);
  oof_socket_dtor(&dummy_oofilter);
  mutex_unlock(&thc_init_mutex);
  return rc;

 alloc_fail:
  oof_socket_del(fm, &dummy_oofilter);
 alloc_fail0:
  mutex_unlock(&thc_mutex);
 alloc_fail_unlocked:
  oof_socket_dtor(&dummy_oofilter);
  mutex_unlock(&thc_init_mutex);
  return rc;
}