コード例 #1
0
ファイル: tcp_connect.c プロジェクト: davenso/openonload
/* check that we can handle this destination */
static int ci_tcp_connect_check_dest(citp_socket* ep, ci_ip_addr_t dst_be32,
                                     int dport_be16)
{
  ci_ip_cached_hdrs* ipcache = &ep->s->pkt;

  ipcache->ip.ip_daddr_be32 = dst_be32;
  ipcache->dport_be16 = dport_be16;
  cicp_user_retrieve(ep->netif, ipcache, &ep->s->cp);

  if(CI_LIKELY( ipcache->status == retrrc_success ||
                ipcache->status == retrrc_nomac   ||
                ipcache->status < 0 )) {
    /* Onloadable. */
    if( ipcache->encap.type & CICP_LLAP_TYPE_XMIT_HASH_LAYER4 )
      /* We don't yet have a local port number, so the result of that
       * lookup may be wrong.
       */
      ci_ip_cache_invalidate(ipcache);
    if( ipcache->ip.ip_saddr_be32 == 0 ) {
      /* Control plane has selected a source address for us -- remember it. */
      ipcache->ip.ip_saddr_be32 = ipcache->ip_saddr_be32;
      ep->s->cp.ip_laddr_be32 = ipcache->ip_saddr_be32;
    }
    return 0;
  }
  else if( ipcache->status == retrrc_localroute ) {
    ci_tcp_state* ts = SOCK_TO_TCP(ep->s);

    if( NI_OPTS(ep->netif).tcp_client_loopback == CITP_TCP_LOOPBACK_OFF)
      return CI_SOCKET_HANDOVER;

    ep->s->s_flags |= CI_SOCK_FLAG_BOUND_ALIEN;
    if( NI_OPTS(ep->netif).tcp_server_loopback != CITP_TCP_LOOPBACK_OFF )
      ts->local_peer = ci_tcp_connect_find_local_peer(ep->netif, dst_be32,
                                                      dport_be16);
    else
      ts->local_peer = OO_SP_NULL;

    if( OO_SP_NOT_NULL(ts->local_peer) ||
        NI_OPTS(ep->netif).tcp_client_loopback !=
        CITP_TCP_LOOPBACK_SAMESTACK ) {
      ipcache->flags |= CI_IP_CACHE_IS_LOCALROUTE;
      if( ipcache->ip.ip_saddr_be32 == 0 ) {
        ipcache->ip.ip_saddr_be32 = dst_be32;
        ep->s->cp.ip_laddr_be32 = dst_be32;
      }
      ipcache->ether_offset = 4; /* lo is non-VLAN */
      ipcache->ip_saddr_be32 = dst_be32;
      ipcache->dport_be16 = dport_be16;
      return 0;
    }
    return CI_SOCKET_HANDOVER;
  }

  return CI_SOCKET_HANDOVER;
}
コード例 #2
0
ファイル: udp.c プロジェクト: majek/openonload
/* initialise all the fields that we can in the UDP state structure.  
** There are no IP options, no destination addresses, no ports */
static void ci_udp_state_init(ci_netif* netif, ci_udp_state* us)
{
  ci_sock_cmn_init(netif, &us->s, 1);

  /* IP_MULTICAST_LOOP is 1 by default, so we should not send multicast
   * unless specially permitted */
  if( ! NI_OPTS(netif).force_send_multicast )
    us->s.cp.sock_cp_flags |= OO_SCP_NO_MULTICAST;

  /* Poison. */
  CI_DEBUG(memset(&us->s + 1, 0xf0, (char*) (us + 1) - (char*) (&us->s + 1)));

  /*! \todo This should be part of sock_cmn reinit, but the comment to that
   * function suggests that it's possibly not a good plan to move it there */

#if CI_CFG_TIMESTAMPING
  ci_udp_recv_q_init(&us->timestamp_q);
#endif

  /*! \todo These two should really be handled in ci_sock_cmn_init() */

  /* Make sure we don't hit any state assertions. Can use
   *  UDP_STATE_FROM_SOCKET_EPINFO() after this. */
  us->s.b.state = CI_TCP_STATE_UDP;

  us->s.so.sndbuf = NI_OPTS(netif).udp_sndbuf_def;
  us->s.so.rcvbuf = NI_OPTS(netif).udp_rcvbuf_def;

  /* Init the ip-caches (packet header templates). */
  ci_udp_hdrs_init(&us->s.pkt);
  ci_ip_cache_init(&us->ephemeral_pkt);
  ci_udp_hdrs_init(&us->ephemeral_pkt);
  udp_lport_be16(us) = 0;
  udp_rport_be16(us) = 0;

#if CI_CFG_ZC_RECV_FILTER
  us->recv_q_filter = 0;
  us->recv_q_filter_arg = 0;
#endif
  ci_udp_recv_q_init(&us->recv_q);
  us->zc_kernel_datagram = OO_PP_NULL;
  us->zc_kernel_datagram_count = 0;
  us->tx_async_q = CI_ILL_END;
  oo_atomic_set(&us->tx_async_q_level, 0);
  us->tx_count = 0;
  us->udpflags = CI_UDPF_MCAST_LOOP;
  us->ip_pktinfo_cache.intf_i = -1;
  us->stamp = 0;
  memset(&us->stats, 0, sizeof(us->stats));
}
コード例 #3
0
ファイル: socket.c プロジェクト: ido/openonload
void ci_sock_cmn_init(ci_netif* ni, ci_sock_cmn* s)
{
  oo_p sp;

  /* Poison. */
  CI_DEBUG(memset(&s->b + 1, 0xf0, (char*) (s + 1) - (char*) (&s->b + 1)));

  citp_waitable_reinit(ni, &s->b);
  oo_sock_cplane_init(&s->cp);
  s->local_peer = OO_SP_NULL;

  s->s_flags = CI_SOCK_FLAG_CONNECT_MUST_BIND | CI_SOCK_FLAG_PMTU_DO;
  s->s_aflags = 0u;

  ci_assert_equal( 0, CI_IP_DFLT_TOS );
  s->so_priority = 0;

  /* SO_SNDBUF & SO_RCVBUF.  See also ci_tcp_set_established_state() which
   * may modify these values.
   */
  memset(&s->so, 0, sizeof(s->so));
  s->so.sndbuf = NI_OPTS(ni).tcp_sndbuf_def;
  s->so.rcvbuf = NI_OPTS(ni).tcp_rcvbuf_def;

  s->rx_bind2dev_ifindex = CI_IFID_BAD;
  /* These don't really need to be initialised, as only significant when
   * rx_bind2dev_ifindex != CI_IFID_BAD.  But makes stackdump output
   * cleaner this way...
   */
  s->rx_bind2dev_base_ifindex = 0;
  s->rx_bind2dev_vlan = 0;

  s->cmsg_flags = 0u;
  s->timestamping_flags = 0u;
  s->os_sock_status = OO_OS_STATUS_TX;

  ci_ip_queue_init(&s->timestamp_q);
  s->timestamp_q_extract = OO_PP_NULL;


  ci_sock_cmn_reinit(ni, s);

  sp = oo_sockp_to_statep(ni, SC_SP(s));
  OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_sock_cmn, reap_link));
  ci_ni_dllist_link_init(ni, &s->reap_link, sp, "reap");
  ci_ni_dllist_self_link(ni, &s->reap_link);
}
コード例 #4
0
ファイル: tcp_init.c プロジェクト: davenso/openonload
ci_fd_t ci_tcp_ep_ctor(citp_socket* ep, ci_netif* netif, int domain, int type)
{
  ci_tcp_state* ts;
  ci_fd_t fd;

  ci_assert(ep);
  ci_assert(netif);

  ci_netif_lock(netif);
  ts = ci_tcp_get_state_buf(netif);
  if( ts == NULL ) {
    ci_netif_unlock(netif);
    LOG_E(ci_log("%s: [%d] out of socket buffers", __FUNCTION__,NI_ID(netif)));
    return -ENOMEM;
  }

  fd = ci_tcp_helper_sock_attach(ci_netif_get_driver_handle(netif), S_SP(ts),
                                 domain, type);
  if( fd < 0 ) {
    if( fd == -EAFNOSUPPORT )
      LOG_U(ci_log("%s: ci_tcp_helper_sock_attach" \
                   "(domain=%d, type=%d) failed %d",
                   __FUNCTION__, domain, type, fd));
    else
      LOG_E(ci_log("%s: ci_tcp_helper_sock_attach" \
                   "(domain=%d, type=%d) failed %d",
                   __FUNCTION__, domain, type, fd));
  }
  else {
    ci_assert(~ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN);
    /* Apply default sockbuf sizes now we've updated them from the kernel
    ** defaults. */
    ts->s.so.sndbuf = NI_OPTS(netif).tcp_sndbuf_def;
    ts->s.so.rcvbuf = NI_OPTS(netif).tcp_rcvbuf_def;
    ep->netif = netif;
    ep->s = &ts->s;
    CHECK_TEP(ep);
  }

  ci_netif_unlock(netif);
  return fd;
}
コード例 #5
0
ファイル: iptimer.c プロジェクト: majek/openonload
/* initialise the iptimer scheduler */
void ci_ip_timer_state_init(ci_netif* netif, unsigned cpu_khz)
{
  ci_ip_timer_state* ipts = IPTIMER_STATE(netif);
  int i;
  int us2isn;

  /* initialise the cycle to tick constants */
  ipts->khz = cpu_khz;
  ipts->ci_ip_time_frc2tick = shift_for_gran(CI_IP_TIME_APP_GRANULARITY, ipts->khz);
  ipts->ci_ip_time_frc2us = shift_for_gran(1, ipts->khz);

  /* The Linux kernel ticks the initial sequence number that it would use for
   * a given tuple every 64 ns.  Onload does the same, when using
   * EF_TCP_ISN_MODE=clocked. However in EF_TCP_ISN_MODE=clocked+cache our use
   * of the clock-driven ISN is slightly different, though, as we remember
   * old sequence numbers in the case where the clock-driven ISN is not known
   * to be safe.  As such, we don't need it to tick so fast, and so we let it
   * tick at most every 256 ns.  This means that it takes more than eight
   * minutes to wrap by half, while four minutes is our assumed maximum
   * peer-MSL.  This in practice reduces the cases in which we have to
   * remember old sequence numbers. */
  us2isn = NI_OPTS(netif).tcp_isn_mode != 0 ? 2 : 4;
  ipts->ci_ip_time_frc2isn = ipts->ci_ip_time_frc2us > us2isn ?
                             ipts->ci_ip_time_frc2us - us2isn : 0;

  ci_ip_time_initial_sync(ipts);
  ipts->sched_ticks = ci_ip_time_now(netif);
  ipts->closest_timer = ipts->sched_ticks + IPTIME_INFINITY;

  /* To convert ms to ticks we will use fixed point arithmetic
   * Calculate conversion factor, which is expected to be in range <0.5,1]
   * */
  ipts->ci_ip_time_ms2tick_fxp =
    (((ci_uint64)ipts->khz) << 32) /
    (1u << ipts->ci_ip_time_frc2tick);
  ci_assert_gt(ipts->ci_ip_time_ms2tick_fxp, 1ull<<31);
  ci_assert_le(ipts->ci_ip_time_ms2tick_fxp, 1ull<<32);

  /* set module specific time constants dependent on frc2tick */
  ci_tcp_timer_init(netif);

  ci_ni_dllist_init(netif, &ipts->fire_list,
		    oo_ptr_to_statep(netif, &ipts->fire_list),
                    "fire");
  
  /* Initialise the wheel lists. */
  for( i=0; i < CI_IPTIME_WHEELSIZE; i++)
    ci_ni_dllist_init(netif, &ipts->warray[i],
		      oo_ptr_to_statep(netif, &ipts->warray[i]),
                      "timw");
}
コード例 #6
0
ファイル: udp_connect.c プロジェクト: bmschwa/openonload
int ci_udp_should_handover(citp_socket* ep, const struct sockaddr* addr,
                           ci_uint16 lport)
{
    ci_uint32 addr_be32;

#if CI_CFG_FAKE_IPV6
    if( ep->s->domain == AF_INET6 && ! ci_tcp_ipv6_is_ipv4(addr) )
        goto handover;
#endif

    if( (CI_BSWAP_BE16(lport) >= NI_OPTS(ep->netif).udp_port_handover_min &&
            CI_BSWAP_BE16(lport) <= NI_OPTS(ep->netif).udp_port_handover_max) ||
            (CI_BSWAP_BE16(lport) >= NI_OPTS(ep->netif).udp_port_handover2_min &&
             CI_BSWAP_BE16(lport) <= NI_OPTS(ep->netif).udp_port_handover2_max) ||
            (CI_BSWAP_BE16(lport) >= NI_OPTS(ep->netif).udp_port_handover3_min &&
             CI_BSWAP_BE16(lport) <= NI_OPTS(ep->netif).udp_port_handover3_max) ) {
        LOG_UC(log(FNS_FMT "HANDOVER (%d <= %d <= %d)",
                   FNS_PRI_ARGS(ep->netif, ep->s),
                   NI_OPTS(ep->netif).udp_port_handover_min,
                   CI_BSWAP_BE16(lport),
                   NI_OPTS(ep->netif).udp_port_handover_max));
        goto handover;
    }

    addr_be32 = ci_get_ip4_addr(ep->s->domain, addr);
    if( addr_be32 != CI_BSWAPC_BE32(INADDR_ANY) &&
            ! cicp_user_addr_is_local_efab(CICP_HANDLE(ep->netif), &addr_be32) &&
            ! CI_IP_IS_MULTICAST(addr_be32) ) {
        /* Either the bind/getsockname indicated that we need to let the OS
          * take this or the local address is not one of ours - so we can safely
          * hand-over as bind to a non-ANY addr cannot be revoked.
          * The filters (if any) have already been removed, so we just get out. */
        goto handover;
    }

    return 0;
handover:
    return 1;
}
コード例 #7
0
ファイル: udp_connect.c プロジェクト: bmschwa/openonload
/* Complete a UDP U/L connect.  The sys connect() call must have been made
 * (and succeeded) before calling this function.  So if anything goes wrong
 * in here, then it can be consider an internal error or failing of onload.
 */
int ci_udp_connect_conclude(citp_socket* ep, ci_fd_t fd,
                            const struct sockaddr* serv_addr,
                            socklen_t addrlen, ci_fd_t os_sock)
{
    const struct sockaddr_in* serv_sin = (const struct sockaddr_in*) serv_addr;
    ci_uint32 dst_be32;
    ci_udp_state* us = SOCK_TO_UDP(ep->s);
    int onloadable;
    int rc = 0;

    CHECK_UEP(ep);

    UDP_CLR_FLAG(us, CI_UDPF_EF_SEND);
    us->s.rx_errno = 0;
    us->s.tx_errno = 0;

    if( IS_DISCONNECTING(serv_sin) ) {
        rc = ci_udp_disconnect(ep, us, os_sock);
        goto out;
    }
#if CI_CFG_FAKE_IPV6
    if( us->s.domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(serv_addr) ) {
        LOG_UC(log(FNT_FMT "HANDOVER not IPv4", FNT_PRI_ARGS(ep->netif, us)));
        goto handover;
    }
#endif

    dst_be32 = ci_get_ip4_addr(serv_sin->sin_family, serv_addr);
    if( (rc = ci_udp_sys_getsockname(os_sock, ep)) != 0 ) {
        LOG_E(log(FNT_FMT "ERROR: (%s:%d) sys_getsockname failed (%d)",
                  FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32),
                  CI_BSWAP_BE16(serv_sin->sin_port), errno));
        goto out;
    }

    us->s.cp.sock_cp_flags |= OO_SCP_CONNECTED;
    ci_udp_set_raddr(us, dst_be32, serv_sin->sin_port);
    cicp_user_retrieve(ep->netif, &us->s.pkt, &us->s.cp);

    switch( us->s.pkt.status ) {
    case retrrc_success:
    case retrrc_nomac:
        onloadable = 1;
        break;
    default:
        onloadable = 0;
        if( NI_OPTS(ep->netif).udp_connect_handover ) {
            LOG_UC(log(FNT_FMT "HANDOVER %s:%d", FNT_PRI_ARGS(ep->netif, us),
                       ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port)));
            goto handover;
        }
        break;
    }

    if( dst_be32 == INADDR_ANY_BE32 || serv_sin->sin_port == 0 ) {
        LOG_UC(log(FNT_FMT "%s:%d - route via OS socket",
                   FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32),
                   CI_BSWAP_BE16(serv_sin->sin_port)));
        ci_udp_clr_filters(ep);
        return 0;
    }
    if( CI_IP_IS_LOOPBACK(dst_be32) ) {
        /* After connecting via loopback it is not possible to connect anywhere
         * else.
         */
        LOG_UC(log(FNT_FMT "HANDOVER %s:%d", FNT_PRI_ARGS(ep->netif, us),
                   ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port)));
        goto handover;
    }

    if( onloadable ) {
#ifdef ONLOAD_OFE
        if( ep->netif->ofe != NULL )
            us->s.ofe_code_start = ofe_socktbl_find(
                                       ep->netif->ofe, OFE_SOCKTYPE_UDP,
                                       udp_laddr_be32(us), udp_raddr_be32(us),
                                       udp_lport_be16(us), udp_rport_be16(us));
#endif

        if( (rc = ci_udp_set_filters(ep, us)) != 0 ) {
            /* Failed to set filters.  Most likely we've run out of h/w filters.
             * Handover to O/S to avoid breaking the app.
             *
             * TODO: Actually we probably won't break the app if we don't
             * handover, as packets will still get delivered via the kernel
             * stack.  Might be worth having a runtime option to choose whether
             * or not to handover in such cases.
             */
            LOG_U(log(FNT_FMT "ERROR: (%s:%d) ci_udp_set_filters failed (%d)",
                      FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32),
                      CI_BSWAP_BE16(serv_sin->sin_port), rc));
            CITP_STATS_NETIF(++ep->netif->state->stats.udp_connect_no_filter);
            goto out;
        }
    }
    else {
        ci_udp_clr_filters(ep);
    }

    LOG_UC(log(LPF "connect: "SF_FMT" %sCONNECTED L:%s:%u R:%s:%u (err:%d)",
               SF_PRI_ARGS(ep,fd), udp_raddr_be32(us) ? "" : "DIS",
               ip_addr_str(udp_laddr_be32(us)),
               (unsigned) CI_BSWAP_BE16(udp_lport_be16(us)),
               ip_addr_str(udp_raddr_be32(us)),
               (unsigned) CI_BSWAP_BE16(udp_rport_be16(us)), errno));
    return 0;

out:
    if( rc < 0 && CITP_OPTS.no_fail )
        goto handover;
    return rc;

handover:
    ci_udp_clr_filters(ep);
    return CI_SOCKET_HANDOVER;
}
コード例 #8
0
/* Locking policy:
 * Enterance: priv->thr->netif is assumed to be locked.
 * Exit: all stacks (the client stack and the listener's stack) are
 * unlocked.
 */
int efab_tcp_loopback_connect(ci_private_t *priv, void *arg)
{
  struct oo_op_loopback_connect *carg = arg;
  ci_netif *alien_ni = NULL;
  oo_sp tls_id;

  ci_assert(ci_netif_is_locked(&priv->thr->netif));
  carg->out_moved = 0;

  if( !CI_PRIV_TYPE_IS_ENDPOINT(priv->fd_type) )
    return -EINVAL;
  if( NI_OPTS(&priv->thr->netif).tcp_client_loopback !=
      CITP_TCP_LOOPBACK_TO_CONNSTACK &&
      NI_OPTS(&priv->thr->netif).tcp_client_loopback !=
      CITP_TCP_LOOPBACK_TO_LISTSTACK &&
      NI_OPTS(&priv->thr->netif).tcp_client_loopback !=
      CITP_TCP_LOOPBACK_TO_NEWSTACK) {
    ci_netif_unlock(&priv->thr->netif);
    return -EINVAL;
  }

  while( iterate_netifs_unlocked(&alien_ni) == 0 ) {

    if( !efab_thr_can_access_stack(netif2tcp_helper_resource(alien_ni),
                                   EFAB_THR_TABLE_LOOKUP_CHECK_USER) )
      continue; /* no permission to look in here */

    if( NI_OPTS(alien_ni).tcp_server_loopback == CITP_TCP_LOOPBACK_OFF )
      continue; /* server does not accept loopback connections */

    if( NI_OPTS(&priv->thr->netif).tcp_client_loopback !=
        CITP_TCP_LOOPBACK_TO_LISTSTACK &&
        NI_OPTS(alien_ni).tcp_server_loopback !=
        CITP_TCP_LOOPBACK_ALLOW_ALIEN_IN_ACCEPTQ )
      continue; /* options of the stacks to not match */

    if( NI_OPTS(&priv->thr->netif).tcp_client_loopback !=
        CITP_TCP_LOOPBACK_TO_LISTSTACK &&
        !efab_thr_user_can_access_stack(alien_ni->uid, alien_ni->euid,
                                        &priv->thr->netif) )
      continue; /* server can't accept our socket */

    tls_id = ci_tcp_connect_find_local_peer(alien_ni, carg->dst_addr,
                                            carg->dst_port);

    if( OO_SP_NOT_NULL(tls_id) ) {
      int rc;

      /* We are going to exit in this or other way: get ref and
       * drop kref of alien_ni */
      efab_thr_ref(netif2tcp_helper_resource(alien_ni));
      iterate_netifs_unlocked_dropref(alien_ni);

      switch( NI_OPTS(&priv->thr->netif).tcp_client_loopback ) {
      case CITP_TCP_LOOPBACK_TO_CONNSTACK:
        /* connect_lo_toconn unlocks priv->thr->netif */
        carg->out_rc =
            ci_tcp_connect_lo_toconn(&priv->thr->netif, priv->sock_id,
                                     carg->dst_addr, alien_ni, tls_id);
        efab_thr_release(netif2tcp_helper_resource(alien_ni));
        return 0;

      case CITP_TCP_LOOPBACK_TO_LISTSTACK:
        /* Nobody should be using this socket, so trylock should succeed.
         * Overwise we hand over the socket and do not accelerate this
         * loopback connection. */
        rc = ci_sock_trylock(&priv->thr->netif,
                             SP_TO_WAITABLE(&priv->thr->netif,
                                            priv->sock_id));
        if( rc == 0 ) {
          ci_netif_unlock(&priv->thr->netif);
          efab_thr_release(netif2tcp_helper_resource(alien_ni));
          return -ECONNREFUSED;
        }

        /* move_to_alien changes locks - see comments near it */
        rc = efab_file_move_to_alien_stack(priv, alien_ni);
        if( rc != 0 ) {
          /* error - everything is already unlocked */
          efab_thr_release(netif2tcp_helper_resource(alien_ni));
          /* if we return error, UL will hand the socket over. */
          return rc;
        }
        /* now alien_ni is locked */

        /* Connect again, using new endpoint */
        carg->out_rc =
            ci_tcp_connect_lo_samestack(
                            alien_ni,
                            SP_TO_TCP(alien_ni,
                                      SP_TO_WAITABLE(&priv->thr->netif,
                                                     priv->sock_id)
                                      ->moved_to_sock_id),
                            tls_id);
        ci_netif_unlock(alien_ni);
        carg->out_moved = 1;
        return 0;


      case CITP_TCP_LOOPBACK_TO_NEWSTACK:
      {
        tcp_helper_resource_t *new_thr;
        ci_resource_onload_alloc_t alloc;

        /* create new stack
         * todo: no hardware interfaces are necessary */
        strcpy(alloc.in_version, ONLOAD_VERSION);
        strcpy(alloc.in_uk_intf_ver, oo_uk_intf_ver);
        alloc.in_name[0] = '\0';
        alloc.in_flags = 0;

        rc = tcp_helper_alloc_kernel(&alloc, &NI_OPTS(&priv->thr->netif), 0,
                                     &new_thr);
        if( rc != 0 ) {
          ci_netif_unlock(&priv->thr->netif);
          efab_thr_release(netif2tcp_helper_resource(alien_ni));
          return -ECONNREFUSED;
        }

        rc = ci_sock_trylock(&priv->thr->netif,
                             SP_TO_WAITABLE(&priv->thr->netif,
                                            priv->sock_id));
        if( rc == 0 ) {
          ci_netif_unlock(&priv->thr->netif);
          efab_thr_release(netif2tcp_helper_resource(alien_ni));
          efab_thr_release(new_thr);
          return -ECONNREFUSED;
        }

        /* move connecting socket to the new stack */
        rc = efab_file_move_to_alien_stack(priv, &new_thr->netif);
        if( rc != 0 ) {
          /* error - everything is already unlocked */
          efab_thr_release(netif2tcp_helper_resource(alien_ni));
          efab_thr_release(new_thr);
          return -ECONNREFUSED;
        }
        /* now new_thr->netif is locked */
        carg->out_moved = 1;
        carg->out_rc = -ECONNREFUSED;

        /* now connect via CITP_TCP_LOOPBACK_TO_CONNSTACK */
        /* connect_lo_toconn unlocks new_thr->netif */
        carg->out_rc =
            ci_tcp_connect_lo_toconn(
                            &new_thr->netif,
                            SP_TO_WAITABLE(&priv->thr->netif,
                                           priv->sock_id)->moved_to_sock_id,
                            carg->dst_addr, alien_ni, tls_id);
        efab_thr_release(netif2tcp_helper_resource(alien_ni));
        return 0;
      }
      }
    }
    else if( tls_id == OO_SP_INVALID )
      break;
  }

  ci_netif_unlock(&priv->thr->netif);
  return -ENOENT;
}
コード例 #9
0
ファイル: socket.c プロジェクト: bmschwa/openonload
  )

  citp_waitable_reinit(ni, &s->b);
  oo_sock_cplane_init(&s->cp);

  s->s_flags = CI_SOCK_FLAG_CONNECT_MUST_BIND | CI_SOCK_FLAG_PMTU_DO;
  s->s_aflags = 0u;

  ci_assert_equal( 0, CI_IP_DFLT_TOS );
  s->so_priority = 0;

  /* SO_SNDBUF & SO_RCVBUF.  See also ci_tcp_set_established_state() which
   * may modify these values.
   */
  memset(&s->so, 0, sizeof(s->so));
  s->so.sndbuf = NI_OPTS(ni).tcp_sndbuf_def;
  s->so.rcvbuf = NI_OPTS(ni).tcp_rcvbuf_def;

  s->rx_bind2dev_ifindex = CI_IFID_BAD;
  /* These don't really need to be initialised, as only significant when
   * rx_bind2dev_ifindex != CI_IFID_BAD.  But makes stackdump output
   * cleaner this way...
   */
  s->rx_bind2dev_base_ifindex = 0;
  s->rx_bind2dev_vlan = 0;

  s->cmsg_flags = 0u;
  s->timestamping_flags = 0u;
  s->os_sock_status = OO_OS_STATUS_TX;

コード例 #10
0
ファイル: tcp_synrecv.c プロジェクト: bmschwa/openonload
/*! Copy socket options and related fields that should be inherited.
 * Inherits into [ts] from [s] & [c]. Options are inherited during EP
 * promotion for unix, during accept handler in Windows & as a result of
 * setsockopt:SOL_SOCKET:SO_UPDATE_ACCEPT_CONTEXT.  MUST have a lock on
 * [ts].  [or_nonblock] controls whether the non-blocking state from [s]
 * overwrites that in [ts] or is OR'd into it.
 */
static void ci_tcp_inherit_options(ci_netif* ni, ci_sock_cmn* s,
                                   ci_tcp_socket_cmn* c, 
                                   ci_tcp_state* ts, const char* ctxt)
{
  ci_assert(ni);
  ci_assert(s);
  ci_assert(c);
  ci_assert(ts);

  ts->s.so = s->so;
  ts->s.cp.so_bindtodevice = s->cp.so_bindtodevice;
  ts->s.cp.ip_ttl = s->cp.ip_ttl;
  ts->s.rx_bind2dev_ifindex = s->rx_bind2dev_ifindex;
  ts->s.rx_bind2dev_base_ifindex = s->rx_bind2dev_base_ifindex;
  ts->s.rx_bind2dev_vlan = s->rx_bind2dev_vlan;
  ci_tcp_set_sndbuf(ni, ts);      /* eff_mss must be valid */
  ci_tcp_set_rcvbuf(ni, ts);      /* and amss, and rcv_wscl */

  {
    /* NB. We have exclusive access to [ts], so it is safe to manipulate
    ** s_aflags without using bit-ops. */
    unsigned inherited_sflags = CI_SOCK_AFLAG_TCP_INHERITED;
    unsigned inherited_sbflags = 0;

    if( NI_OPTS(ni).accept_inherit_nonblock )
      inherited_sbflags |= CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY;

    ci_assert((ts->s.s_aflags & inherited_sflags) == 0);
    ci_atomic32_or(&ts->s.s_aflags, s->s_aflags & inherited_sflags);

    if( NI_OPTS(ni).tcp_force_nodelay == 1 )
      ci_bit_set(&ts->s.s_aflags, CI_SOCK_AFLAG_NODELAY_BIT);
    else if( NI_OPTS(ni).tcp_force_nodelay == 2 )
      ci_bit_clear(&ts->s.s_aflags, CI_SOCK_AFLAG_NODELAY_BIT);

    ci_assert((ts->s.b.sb_aflags & inherited_sbflags) == 0);
    ci_atomic32_or(&ts->s.b.sb_aflags, s->b.sb_aflags & inherited_sbflags);

    ci_assert_equal((ts->s.s_flags & CI_SOCK_FLAG_TCP_INHERITED),
                    CI_SOCK_FLAG_PMTU_DO);
    ts->s.s_flags &= ~CI_SOCK_FLAG_PMTU_DO;
    ts->s.s_flags |= s->s_flags & CI_SOCK_FLAG_TCP_INHERITED;
  }

  /* Bug1861: while not defined as such, various SOL_TCP/SOL_IP sockopts
   * are inherited in Linux. */
  /* TCP_KEEPIDLE, TCP_KEEPINTVL, TCP_KEEPCNT */
  ts->c.t_ka_time          = c->t_ka_time;
  ts->c.t_ka_time_in_secs  = c->t_ka_time_in_secs;
  ts->c.t_ka_intvl         = c->t_ka_intvl;
  ts->c.t_ka_intvl_in_secs = c->t_ka_intvl_in_secs;
  ts->c.ka_probe_th        = c->ka_probe_th;
  ci_ip_hdr_init_fixed(&ts->s.pkt.ip, IPPROTO_TCP,
                        s->pkt.ip.ip_ttl,
                        s->pkt.ip.ip_tos);
  ts->s.cmsg_flags = s->cmsg_flags;
  ts->s.timestamping_flags = s->timestamping_flags;

  /* Must have set up so.sndbuf */
  ci_tcp_init_rcv_wnd(ts, ctxt);
}
コード例 #11
0
ファイル: tcp_connect.c プロジェクト: davenso/openonload
/* Returns:
 *          0                  on success
 *          
 *          CI_SOCKET_ERROR (and errno set)
 *                             this is a normal error that is returned to the
 *                             the application
 *
 *          CI_SOCKET_HANDOVER we tell the upper layers to handover, no need
 *                             to set errno since it isn't a real error
 */
int ci_tcp_connect(citp_socket* ep, const struct sockaddr* serv_addr,
		   socklen_t addrlen, ci_fd_t fd, int *p_moved)
{
  /* Address family is validated earlier. */
  struct sockaddr_in* inaddr = (struct sockaddr_in*) serv_addr;
  ci_sock_cmn* s = ep->s;
  ci_tcp_state* ts = &SOCK_TO_WAITABLE_OBJ(s)->tcp;
  int rc = 0, crc;
  ci_uint32 dst_be32;

  if( NI_OPTS(ep->netif).tcp_connect_handover )
    return CI_SOCKET_HANDOVER;

  /* Make sure we're up-to-date. */
  ci_netif_lock(ep->netif);
  CHECK_TEP(ep);
  ci_netif_poll(ep->netif);

  /*
   * 1. Check if state of the socket is OK for connect operation.
   */

 start_again:

  if( (rc = ci_tcp_connect_handle_so_error(s)) != 0) {
    CI_SET_ERROR(rc, rc);
    goto unlock_out;
  }

  if( s->b.state != CI_TCP_CLOSED ) {
    /* see if progress can be made on this socket before
    ** determining status  (e.g. non-blocking connect and connect poll)*/
    if( s->b.state & CI_TCP_STATE_SYNCHRONISED ) {
      if( ts->tcpflags & CI_TCPT_FLAG_NONBLOCK_CONNECT ) {
        ts->tcpflags &= ~CI_TCPT_FLAG_NONBLOCK_CONNECT;
	rc = 0;
	goto unlock_out;
      }
      if( serv_addr->sa_family == AF_UNSPEC )
        LOG_E(ci_log("Onload does not support TCP disconnect via "

                     "connect(addr->sa_family==AF_UNSPEC)"));
      CI_SET_ERROR(rc, EISCONN);
    }
    else if( s->b.state == CI_TCP_LISTEN ) {
#if CI_CFG_POSIX_CONNECT_AFTER_LISTEN
      CI_SET_ERROR(rc, EOPNOTSUPP);
#else
      if( ci_tcp_validate_sa(s->domain, serv_addr, addrlen) ) {
        /* Request should be forwarded to OS */
        rc = CI_SOCKET_HANDOVER;
	goto unlock_out;
      }
      if( serv_addr->sa_family == AF_UNSPEC ) {
        /* Linux does listen shutdown on disconnect (AF_UNSPEC) */
        ci_netif_unlock(ep->netif);
        rc = ci_tcp_shutdown(ep, SHUT_RD, fd);
	goto out;
      } else {
        /* Linux has curious error reporting in this case */
        CI_SET_ERROR(rc, EISCONN);
      }
#endif
    }
    else {
      /* Socket is in SYN-SENT state. Let's block for receiving SYN-ACK */
      ci_assert_equal(s->b.state, CI_TCP_SYN_SENT);
      if( s->b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY) )
        CI_SET_ERROR(rc, EALREADY);
      else
        goto syn_sent;
    }
    goto unlock_out;
  }

  /* Check if we've ever been connected. */
  if( ts->tcpflags & CI_TCPT_FLAG_WAS_ESTAB ) {
    CI_SET_ERROR(rc, EISCONN);
    goto unlock_out;
  }

  /* 
   * 2. Check address parameter, if it's inappropriate for handover
   *    decision or handover should be done, try to to call OS and
   *    do handover on success.
   */

  if (
    /* Af first, check that address family and length is OK. */
    ci_tcp_validate_sa(s->domain, serv_addr, addrlen)
    /* rfc793 p54 if the foreign socket is unspecified return          */
    /* "error: foreign socket unspecified" (EINVAL), but keep it to OS */
    || (dst_be32 = ci_get_ip4_addr(inaddr->sin_family, serv_addr)) == 0
    /* Zero destination port is tricky as well, keep it to OS */
    || inaddr->sin_port == 0 )
  {
    rc = CI_SOCKET_HANDOVER;
    goto unlock_out;
  }
  
  /* is this a socket that we can handle? */
  rc = ci_tcp_connect_check_dest(ep, dst_be32, inaddr->sin_port);
  if( rc )  goto unlock_out;

  if( (ts->s.pkt.flags & CI_IP_CACHE_IS_LOCALROUTE) &&
      OO_SP_IS_NULL(ts->local_peer) ) {
    /* Try to connect to another stack; handover if can't */
    struct oo_op_loopback_connect op;
    op.dst_port = inaddr->sin_port;
    op.dst_addr = dst_be32;
    /* this operation unlocks netif */
    rc = oo_resource_op(fd, OO_IOC_TCP_LOOPBACK_CONNECT, &op);
    if( rc < 0)
      return CI_SOCKET_HANDOVER;
    if( op.out_moved )
      *p_moved = 1;
    if( op.out_rc == -EINPROGRESS )
      RET_WITH_ERRNO( EINPROGRESS );
    else if( op.out_rc == -EAGAIN )
      return -EAGAIN;
    else if( op.out_rc != 0 )
      return CI_SOCKET_HANDOVER;
    return 0;
  }

  /* filters can't handle alien source address */
  if( (s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN) &&
      ! (ts->s.pkt.flags & CI_IP_CACHE_IS_LOCALROUTE) ) {
    rc = CI_SOCKET_HANDOVER;
    goto unlock_out;
  }

  crc = ci_tcp_connect_ul_start(ep->netif, ts, dst_be32, inaddr->sin_port, &rc);
  if( crc != CI_CONNECT_UL_OK ) {
    switch( crc ) {
    case CI_CONNECT_UL_FAIL:
      goto unlock_out;
    case CI_CONNECT_UL_LOCK_DROPPED:
      goto out;
    case CI_CONNECT_UL_START_AGAIN:
      goto start_again;
    }
  }
  CI_TCP_STATS_INC_ACTIVE_OPENS( ep->netif );

 syn_sent:
  rc = ci_tcp_connect_ul_syn_sent(ep->netif, ts);

 unlock_out:
  ci_netif_unlock(ep->netif);
 out:
  return rc;
}
コード例 #12
0
ファイル: tcp_connect.c プロジェクト: davenso/openonload
static int ci_tcp_connect_ul_start(ci_netif *ni, ci_tcp_state* ts,
				   ci_uint32 dst_be32, unsigned dport_be16,
                                   int* fail_rc)
{
  ci_ip_pkt_fmt* pkt;
  int rc = 0;

  ci_assert(ts->s.pkt.mtu);

  /* Now that we know the outgoing route, set the MTU related values.
   * Note, even these values are speculative since the real MTU
   * could change between now and passing the packet to the lower layers
   */
  ts->amss = ts->s.pkt.mtu - sizeof(ci_tcp_hdr) - sizeof(ci_ip4_hdr);
#if CI_CFG_LIMIT_AMSS
  ts->amss = ci_tcp_limit_mss(ts->amss, ni, __FUNCTION__);
#endif

  /* Default smss until discovered by MSS option in SYN - RFC1122 4.2.2.6 */
  ts->smss = CI_CFG_TCP_DEFAULT_MSS;

  /* set pmtu, eff_mss, snd_buf and adjust windows */
  ci_pmtu_set(ni, &ts->pmtus, ts->s.pkt.mtu);
  ci_tcp_set_eff_mss(ni, ts);
  ci_tcp_set_initialcwnd(ni, ts);

  /* Send buffer adjusted by ci_tcp_set_eff_mss(), but we want it to stay
   * zero until the connection is established.
   */
  ts->so_sndbuf_pkts = 0;

  /* 
   * 3. State and address are OK. It's address routed through our NIC.
   *    Do connect().
   */
  ci_assert_nequal(ts->s.pkt.ip.ip_saddr_be32, INADDR_ANY);

  if( ts->s.s_flags & CI_SOCK_FLAG_CONNECT_MUST_BIND ) {
    ci_sock_cmn* s = &ts->s;
    ci_uint16 source_be16 = 0;

    if( s->s_flags & CI_SOCK_FLAG_ADDR_BOUND )
      rc = __ci_bind(ni, &ts->s, ts->s.pkt.ip.ip_saddr_be32, &source_be16);
    else 
      rc = __ci_bind(ni, &ts->s, INADDR_ANY, &source_be16);
    if(CI_LIKELY( rc == 0 )) {
      TS_TCP(ts)->tcp_source_be16 = source_be16;
      ts->s.cp.lport_be16 = source_be16;
      LOG_TC(log(LNT_FMT "connect: our bind returned %s:%u", 
                 LNT_PRI_ARGS(ni, ts),
                 ip_addr_str(INADDR_ANY),
                 (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16)));
    }
    else {
      LOG_U(ci_log("__ci_bind returned %d at %s:%d", CI_GET_ERROR(rc),
                   __FILE__, __LINE__));
      *fail_rc = rc;
      return CI_CONNECT_UL_FAIL;
    }
    if(CI_UNLIKELY( ts->s.pkt.ip.ip_saddr_be32 == 0 )) {
      CI_SET_ERROR(*fail_rc, EINVAL);
      return CI_CONNECT_UL_FAIL;
    }
  }

  ci_tcp_set_peer(ts, dst_be32, dport_be16);

  /* Make sure we can get a buffer before we change state. */
  pkt = ci_netif_pkt_tx_tcp_alloc(ni);
  if( CI_UNLIKELY(! pkt) ) {
    /* NB. We've already done a poll above. */
    rc = ci_netif_pkt_wait(ni, &ts->s, CI_SLEEP_NETIF_LOCKED|CI_SLEEP_NETIF_RQ);
    if( ci_netif_pkt_wait_was_interrupted(rc) ) {
      CI_SET_ERROR(*fail_rc, -rc);
      return CI_CONNECT_UL_LOCK_DROPPED;
    }
    /* OK, there are (probably) packets available - go try again.  Note we
     * jump back to the top of the function because someone may have
     * connected this socket in the mean-time, so we need to check the
     * state once more.
     */
    return CI_CONNECT_UL_START_AGAIN;
  }

#ifdef ONLOAD_OFE
    if( ni->ofe != NULL )
      ts->s.ofe_code_start = ofe_socktbl_find(
                        ni->ofe, OFE_SOCKTYPE_TCP_ACTIVE,
                        tcp_laddr_be32(ts), tcp_raddr_be32(ts),
                        tcp_lport_be16(ts), tcp_rport_be16(ts));
#endif

  rc = ci_tcp_ep_set_filters(ni, S_SP(ts), ts->s.cp.so_bindtodevice,
                             OO_SP_NULL);
  if( rc < 0 ) {
    /* Perhaps we've run out of filters?  See if we can push a socket out
     * of timewait and steal its filter.
     */
    ci_assert_nequal(rc, -EFILTERSSOME);
    if( rc != -EBUSY || ! ci_netif_timewait_try_to_free_filter(ni) ||
        (rc = ci_tcp_ep_set_filters(ni, S_SP(ts),
                                    ts->s.cp.so_bindtodevice,
                                    OO_SP_NULL)) < 0 ) {
      ci_assert_nequal(rc, -EFILTERSSOME);
      /* Either a different error, or our efforts to free a filter did not
       * work.
       */
      if( ! (ts->s.s_flags & CI_SOCK_FLAG_ADDR_BOUND) ) {
        ts->s.pkt.ip.ip_saddr_be32 = 0;
        ts->s.cp.ip_laddr_be32 = 0;
      }
      ci_netif_pkt_release(ni, pkt);
      CI_SET_ERROR(*fail_rc, -rc);
      return CI_CONNECT_UL_FAIL;
    }
  }

  LOG_TC(log(LNT_FMT "CONNECT %s:%u->%s:%u", LNT_PRI_ARGS(ni, ts),
	     ip_addr_str(ts->s.pkt.ip.ip_saddr_be32),
	     (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16),
	     ip_addr_str(ts->s.pkt.ip.ip_daddr_be32),
	     (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_dest_be16)));

  /* We are going to send the SYN - set states appropriately */
  tcp_snd_una(ts) = tcp_snd_nxt(ts) = tcp_enq_nxt(ts) = tcp_snd_up(ts) =
    ci_tcp_initial_seqno(ni);
  ts->snd_max = tcp_snd_nxt(ts) + 1;

  /* Must be after initialising snd_una. */
  ci_tcp_clear_rtt_timing(ts);
  ci_tcp_set_flags(ts, CI_TCP_FLAG_SYN);
  ts->tcpflags &=~ CI_TCPT_FLAG_OPT_MASK;
  ts->tcpflags |= NI_OPTS(ni).syn_opts;

  if( (ts->tcpflags & CI_TCPT_FLAG_WSCL) ) {
    ts->rcv_wscl = ci_tcp_wscl_by_buff(ni, ci_tcp_rcvbuf_established(ni, &ts->s));
    CI_IP_SOCK_STATS_VAL_RXWSCL(ts, ts->rcv_wscl);
  }
  else {
    ts->rcv_wscl = 0;
    CI_IP_SOCK_STATS_VAL_RXWSCL(ts, 0);
  }
  ci_tcp_set_rcvbuf(ni, ts);
  ci_tcp_init_rcv_wnd(ts, "CONNECT");

  /* outgoing_hdrs_len is initialised to include timestamp option. */
  if( ! (ts->tcpflags & CI_TCPT_FLAG_TSO) )
    ts->outgoing_hdrs_len = sizeof(ci_ip4_hdr)+sizeof(ci_tcp_hdr);
  if( ci_tcp_can_stripe(ni, ts->s.pkt.ip.ip_saddr_be32,
			ts->s.pkt.ip.ip_daddr_be32) )
    ts->tcpflags |= CI_TCPT_FLAG_STRIPE;
  ci_tcp_set_slow_state(ni, ts, CI_TCP_SYN_SENT);

  /* If the app trys to send data on a socket in SYN_SENT state
  ** then the data is queued for send until the SYN gets ACKed.
  ** (rfc793 p56)
  **
  ** Receive calls on the socket should block until data arrives
  ** (rfc793 p58)
  **
  ** Clearing tx_errno and rx_errno acheive this. The transmit window
  ** is set to 1 byte which ensures that only the SYN packet gets
  ** sent until the ACK is received with more window. 
  */
  ci_assert(ts->snd_max == tcp_snd_nxt(ts) + 1);
  ts->s.rx_errno = 0;
  ts->s.tx_errno = 0; 
  ci_tcp_enqueue_no_data(ts, ni, pkt);
  ci_tcp_set_flags(ts, CI_TCP_FLAG_ACK);  

  if( ts->s.b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY) ) {
    ts->tcpflags |= CI_TCPT_FLAG_NONBLOCK_CONNECT;
    LOG_TC(log( LNT_FMT "Non-blocking connect - return EINPROGRESS",
		LNT_PRI_ARGS(ni, ts)));
    CI_SET_ERROR(*fail_rc, EINPROGRESS);
    return CI_CONNECT_UL_FAIL;
  }

  return CI_CONNECT_UL_OK;
}
コード例 #13
0
ファイル: tcp_connect.c プロジェクト: davenso/openonload
int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog)
{
  /* 
  ** ?? error handling on possible fails not handled robustly...
  ** ?? Need to check port number is valid TODO
  */

  /*! \todo If not bound then we have to be listening on all interfaces.
   * It's likely that we won't be coming through here as we have to
   * listen on the OS socket too! */
  ci_tcp_state* ts;
  ci_tcp_socket_listen* tls;
  ci_netif* netif = ep->netif;
  ci_sock_cmn* s = ep->s;
  unsigned ul_backlog = backlog;
  int rc;
  oo_p sp;

  LOG_TC(log("%s "SK_FMT" listen backlog=%d", __FUNCTION__, SK_PRI_ARGS(ep), 
             backlog));
  CHECK_TEP(ep);

  if( NI_OPTS(netif).tcp_listen_handover )
    return CI_SOCKET_HANDOVER;
  if( !NI_OPTS(netif).tcp_server_loopback) {
    /* We should handover if the socket is bound to alien address. */
    if( s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN )
      return CI_SOCKET_HANDOVER;
  }

  if( ul_backlog < 0 )
    ul_backlog = NI_OPTS(netif).max_ep_bufs;
  else if( ul_backlog < NI_OPTS(netif).acceptq_min_backlog )
    ul_backlog = NI_OPTS(netif).acceptq_min_backlog;

  if( s->b.state == CI_TCP_LISTEN ) {
    tls = SOCK_TO_TCP_LISTEN(s);
    tls->acceptq_max = ul_backlog;
    ci_tcp_helper_listen_os_sock(fd, ul_backlog);
    return 0;
  }

  if( s->b.state != CI_TCP_CLOSED ) {
    CI_SET_ERROR(rc, EINVAL);
    return rc;
  }


  ts = SOCK_TO_TCP(s);

  /* Bug 3376: if socket used for a previous, failed, connect then the error
   * numbers will not be as expected.  Only seen when not using listening
   * netifs (as moving the EP to the new netif resets them). 
   */

  ts->s.tx_errno = EPIPE;



  ts->s.rx_errno = ENOTCONN;

  /* fill in address/ports and all TCP state */
  if( !(ts->s.s_flags & CI_SOCK_FLAG_BOUND) ) {
    ci_uint16 source_be16;

    /* They haven't previously done a bind, so we need to choose 
     * a port.  As we haven't been given a hint we let the OS choose. */

    source_be16 = 0;
    rc = __ci_bind(ep->netif, ep->s, ts->s.pkt.ip.ip_saddr_be32, &source_be16);
    if (CI_LIKELY( rc==0 )) {
      TS_TCP(ts)->tcp_source_be16 = source_be16;
      ts->s.cp.lport_be16 = source_be16;
      LOG_TC(log(LNT_FMT "listen: our bind returned %s:%u", 
                 LNT_PRI_ARGS(ep->netif, ts),
                 ip_addr_str(ts->s.pkt.ip.ip_saddr_be32),
                 (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16)));

    } else {
      LOG_U(ci_log("__ci_bind returned %d at %s:%d", CI_GET_ERROR(rc),
                   __FILE__, __LINE__));
      return rc;
    }
  } 

  ci_sock_lock(netif, &ts->s.b);
  ci_tcp_set_slow_state(netif, ts, CI_TCP_LISTEN);
  tls = SOCK_TO_TCP_LISTEN(&ts->s);

  tcp_raddr_be32(tls) = 0u;
  tcp_rport_be16(tls) = 0u;

  ci_assert_equal(tls->s.tx_errno, EPIPE);



  ci_assert_equal(tls->s.rx_errno, ENOTCONN);

  /* setup listen timer - do it before the first return statement,
   * because __ci_tcp_listen_to_normal() will be called on error path. */
  if( ~tls->s.s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) {
    sp = TS_OFF(netif, tls);
    OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_socket_listen, listenq_tid));
    ci_ip_timer_init(netif, &tls->listenq_tid, sp, "lstq");
    tls->listenq_tid.param1 = S_SP(tls);
    tls->listenq_tid.fn = CI_IP_TIMER_TCP_LISTEN;
  }

  rc = ci_tcp_listen_init(netif, tls);
  ci_sock_unlock(netif, &ts->s.b);
  if( rc != 0 ) {
    CI_SET_ERROR(rc, -rc);
    goto listen_fail;
  }
  tls->acceptq_max = ul_backlog;

  CITP_STATS_TCP_LISTEN(CI_ZERO(&tls->stats));

  /* install all the filters needed for this connection 
   *    - tcp_laddr_be32(ts) = 0 for IPADDR_ANY
   *
   *  TODO: handle BINDTODEVICE by setting phys_port paramter to correct 
   *        physical L5 port index
   *  TODO: handle REUSEADDR by setting last paramter to TRUE
   */
  if( ~s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) {
#ifdef ONLOAD_OFE
    if( netif->ofe != NULL ) {
      tls->s.ofe_code_start = ofe_socktbl_find(
                        netif->ofe, OFE_SOCKTYPE_TCP_LISTEN,
                        tcp_laddr_be32(tls), INADDR_ANY,
                        tcp_lport_be16(ts), 0);
      tls->ofe_promote = ofe_socktbl_find(
                        netif->ofe, OFE_SOCKTYPE_TCP_PASSIVE,
                        tcp_laddr_be32(tls), INADDR_ANY,
                        tcp_lport_be16(ts), 0);
    }
#endif
    rc = ci_tcp_ep_set_filters(netif, S_SP(tls), tls->s.cp.so_bindtodevice,
                               OO_SP_NULL);
    if( rc == -EFILTERSSOME ) {
      if( CITP_OPTS.no_fail )
        rc = 0;
      else {
        ci_tcp_ep_clear_filters(netif, S_SP(tls), 0);
        rc = -ENOBUFS;
      }
    }
    ci_assert_nequal(rc, -EFILTERSSOME);
    VERB(ci_log("%s: set_filters  returned %d", __FUNCTION__, rc));
    if (rc < 0) {
      CI_SET_ERROR(rc, -rc);
      goto post_listen_fail;
    }
  }


  /* 
   * Call of system listen() is required for listen any, local host
   * communications server and multi-homed server (to accept connections
   * to L5 assigned address(es), but incoming from other interfaces).
   */
#ifdef __ci_driver__
  {
    rc = efab_tcp_helper_listen_os_sock( netif2tcp_helper_resource(netif),
					 S_SP(tls), backlog);
  }
#else
  rc = ci_tcp_helper_listen_os_sock(fd, backlog);
#endif
  if ( rc < 0 ) {
    /* clear the filter we've just set */
    ci_tcp_ep_clear_filters(netif, S_SP(tls), 0);
    goto post_listen_fail;
  }
  return 0;

 post_listen_fail:
  ci_tcp_listenq_drop_all(netif, tls);
 listen_fail:
  /* revert TCP state to a non-listening socket format */
  __ci_tcp_listen_to_normal(netif, tls);
  /* Above function sets orphan flag but we are attached to an FD. */
  ci_bit_clear(&tls->s.b.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT);
#ifdef __ci_driver__
  return rc;
#else
  return CI_SOCKET_ERROR;
#endif
}
コード例 #14
0
/* This function must be called with netif lock not held and it always
 * returns with the netif lock not held.
 */
int efab_tcp_helper_reuseport_bind(ci_private_t *priv, void *arg)
{
  oo_tcp_reuseport_bind_t* trb = arg;
  ci_netif* ni = &priv->thr->netif;
  tcp_helper_cluster_t* thc;
  tcp_helper_resource_t* thr = NULL;
  citp_waitable* waitable;
  ci_sock_cmn* sock = SP_TO_SOCK(ni, priv->sock_id);
  struct oof_manager* fm = efab_tcp_driver.filter_manager;
  struct oof_socket* oofilter;
  struct oof_socket dummy_oofilter;
  int protocol = thc_get_sock_protocol(sock);
  char name[CI_CFG_CLUSTER_NAME_LEN + 1];
  int rc, rc1;
  int flags = 0;
  tcp_helper_cluster_t* named_thc,* ported_thc;
  int alloced = 0;

  /* No clustering on sockets bound to alien addresses */
  if( sock->s_flags & CI_SOCK_FLAG_BOUND_ALIEN )
    return 0;

  if( NI_OPTS(ni).cluster_ignore == 1 ) {
    LOG_NV(ci_log("%s: Ignored attempt to use clusters due to "
                  "EF_CLUSTER_IGNORE option.", __FUNCTION__));
    return 0;
  }

  if( trb->port_be16 == 0 ) {
    ci_log("%s: Reuseport on port=0 is not supported", __FUNCTION__);
    return -EINVAL;
  }

  if( trb->cluster_size < 2 ) {
    ci_log("%s: Cluster sizes < 2 are not supported", __FUNCTION__);
    return -EINVAL;
  }

  if( sock->s_flags & (CI_SOCK_FLAG_TPROXY | CI_SOCK_FLAG_MAC_FILTER) ) {
    ci_log("%s: Scalable filter sockets cannot be clustered",
           __FUNCTION__);
    return -EINVAL;
  }

  oofilter = &ci_trs_ep_get(priv->thr, priv->sock_id)->oofilter;

  if( oofilter->sf_local_port != NULL ) {
    ci_log("%s: Socket that already have filter cannot be clustered",
           __FUNCTION__);
    return -EINVAL;
  }

  if( priv->thr->thc ) {
    /* Reserve proto:port[:ip] until bind (or close)*/
    rc = oof_socket_add(fm, oofilter,
                       OOF_SOCKET_ADD_FLAG_CLUSTERED |
                       OOF_SOCKET_ADD_FLAG_DUMMY,
                       protocol, trb->addr_be32, trb->port_be16, 0, 0,
                       &ported_thc);
    if( rc > 0 )
      rc = 0;
    if( rc == 0 )
      sock->s_flags |= CI_SOCK_FLAG_FILTER;
    return rc;
  }

  mutex_lock(&thc_init_mutex);
  /* We are going to be iterating over clusters, make sure they don't
   * change.
   */
  mutex_lock(&thc_mutex);

  /* Lookup a suitable cluster to use */

  /* We try to add dummy filter to oof to reserve proto:port[:ip] tuple,
   * if there is already a cluster at the tuple we will get reference to it,
   */
  oof_socket_ctor(&dummy_oofilter);
  rc = oof_socket_add(fm, &dummy_oofilter,
                      OOF_SOCKET_ADD_FLAG_CLUSTERED |
                      OOF_SOCKET_ADD_FLAG_DUMMY |
                      OOF_SOCKET_ADD_FLAG_NO_STACK,
                      protocol, trb->addr_be32, trb->port_be16, 0, 0,
                      &ported_thc);
  if( rc < 0 ) /* non-clustered socket on the tuple */
    goto alloc_fail0;

  if( ! gen_cluster_name(trb->cluster_name, name) ) {
    /* user requested a cluster by name.  But we need to make sure
     * that the oof_local_port that the user is interested in is not
     * being used by another cluster.  We search for cluster by name
     * and use results of prior protp:port[:ip] search oof_local_port
     * to then do some sanity checking.
     */
    rc1 = thc_search_by_name(name, protocol, trb->port_be16, ci_geteuid(),
                            &named_thc);
    if( rc1 < 0 ) {
      rc = rc1;
      goto alloc_fail;
    }

    if( rc1 == 0 ) {
      if( rc == 1 ) {
        /* search by oof_local_port found a cluster which search by
         * name didn't find. */
        LOG_E(ci_log("Error: Cluster with requested name %s already "
                     "bound to %s", name, ported_thc->thc_name));
        rc = -EEXIST;
        goto alloc_fail;
      }
      else {
        /* Neither searches found a cluster.  So allocate one below.
         */
      }
    }
    else {
      if( rc == 1 ) {
        /* Both searches found clusters.  Fine if they are the same or
         * else error. */
        if( named_thc != ported_thc ) {
          LOG_E(ci_log("Error: Cluster %s does not handle socket %s:%d.  "
                       "Cluster %s does", name, FMT_PROTOCOL(protocol),
                       trb->port_be16, named_thc->thc_name));
          rc = -EEXIST;
          goto alloc_fail;
        }
      }
      /* Search by name found a cluster no conflict with search by tuple
       * (the ported cluster is either none or the same as named)*/
      thc = named_thc;
      goto cont;
    }
  }
  else {
    /* No cluster name requested.  We have already looked for a cluster handling
     * the tuple.  If none found, then try to use an existing
     * cluster this process created.  If none found, then allocate one.
     */
    /* If rc == 0, then no cluster found - try to allocate one.
     * If rc == 1, we found cluster - make sure that euids match and continue. */
    if( rc == 1 ) {
      thc = ported_thc;
      if( thc->thc_euid != ci_geteuid() ) {
        rc = -EADDRINUSE;
        goto alloc_fail;
      }
      goto cont;
    }
    rc = thc_search_by_name(name, protocol, trb->port_be16, ci_geteuid(),
                            &thc);
    if( rc < 0 )
      goto alloc_fail;
    if( rc == 1 )
      goto cont;
  }
  /* When an interface is in tproxy mode, all clustered listening socket
   * are assumed to be part of tproxy passive side.  This requires
   * rss context to use altered rss hashing based solely on src ip:port.
   */
  flags = tcp_helper_cluster_thc_flags(&NI_OPTS(ni));

  if( (rc = thc_alloc(name, protocol, trb->port_be16, ci_geteuid(),
                      trb->cluster_size, flags, &thc)) != 0 )
      goto alloc_fail;

  alloced = 1;

 cont:
  tcp_helper_cluster_ref(thc);

  /* At this point we have our cluster with one additional reference */

  /* Find a suitable stack within the cluster to use */
  rc = thc_get_thr(thc, &dummy_oofilter, &thr);
  if( rc != 0 )
    rc = thc_alloc_thr(thc, trb->cluster_restart_opt,
                       &ni->opts, ni->flags, &thr);

  /* If get or alloc succeeded thr holds reference to the cluster,
   * so the cluster cannot go away.  We'll drop our reference and also
   * will not be accessing state within the cluster anymore so we can
   * drop the lock. */
  mutex_unlock(&thc_mutex);

  if( alloced && rc == 0 && (flags & THC_FLAG_TPROXY) != 0 ) {
    /* Tproxy filter is allocated as late as here,
     * the reason is that this needs to be preceded by stack allocation
     * (firmware needs initialized vi) */
    rc = thc_install_tproxy(thc, NI_OPTS(ni).scalable_filter_ifindex);
    if( rc != 0 )
      efab_thr_release(thr);
  }

  tcp_helper_cluster_release(thc, NULL);

  if( rc != 0 ) {
    oof_socket_del(fm, &dummy_oofilter);
    goto alloc_fail_unlocked;
  }

  /* We have thr and we hold single reference to it. */

  /* Move the socket into the new stack */
  if( (rc = ci_netif_lock(ni)) != 0 )
    goto drop_and_done;
  waitable = SP_TO_WAITABLE(ni, priv->sock_id);
  rc = ci_sock_lock(ni, waitable);
  if( rc != 0 ) {
    ci_netif_unlock(ni);
    goto drop_and_done;
  }
  /* thr referencing scheme comes from efab_file_move_to_alien_stack_rsop */
  efab_thr_ref(thr);
  rc = efab_file_move_to_alien_stack(priv, &thr->netif, 0);
  if( rc != 0 )
    efab_thr_release(thr);
  else {
    /* beside us, socket now holds its own reference to thr */
    oofilter = &ci_trs_ep_get(thr, sock->b.moved_to_sock_id)->oofilter;
    oof_socket_replace(fm, &dummy_oofilter, oofilter);
    SP_TO_SOCK(&thr->netif, sock->b.moved_to_sock_id)->s_flags |= CI_SOCK_FLAG_FILTER;
    ci_netif_unlock(&thr->netif);
  }

 drop_and_done:
  if( rc != 0 )
    oof_socket_del(fm, &dummy_oofilter);
  /* Drop the reference we got from thc_get_thr or thc_alloc_thr().
   * If things went wrong both stack and cluster might disappear. */
  efab_thr_release(thr);
  oof_socket_dtor(&dummy_oofilter);
  mutex_unlock(&thc_init_mutex);
  return rc;

 alloc_fail:
  oof_socket_del(fm, &dummy_oofilter);
 alloc_fail0:
  mutex_unlock(&thc_mutex);
 alloc_fail_unlocked:
  oof_socket_dtor(&dummy_oofilter);
  mutex_unlock(&thc_init_mutex);
  return rc;
}
コード例 #15
0
/* Allocates a new stack in thc.
 *
 * You need to efab_thr_release() the stack returned by this function
 * when done.
 *
 * You must hold the thc_mutex before calling this function.
 */
static int thc_alloc_thr(tcp_helper_cluster_t* thc,
                         int cluster_restart_opt,
                         const ci_netif_config_opts* ni_opts,
                         int ni_flags,
                         tcp_helper_resource_t** thr_out)
{
  int rc;
  tcp_helper_resource_t* thr_walk;
  ci_resource_onload_alloc_t roa;
  ci_netif_config_opts* opts;
  ci_netif* netif;

  memset(&roa, 0, sizeof(roa));

  if( (rc = thc_get_next_thr_name(thc, roa.in_name)) != 0 ) {
    /* All stack names taken i.e. cluster is full.  Based on setting
     * of cluster_restart_opt, either kill a orphan or return error. */
    if( thc_has_orphans(thc) == 1 ) {
      /* Checking for CITP_CLUSTER_RESTART_TERMINATE_ORPHANS */
      if( cluster_restart_opt == 1 ) {
        thc_kill_an_orphan(thc);
        rc = thc_get_next_thr_name(thc, roa.in_name);
        ci_assert_equal(rc, 0);
      }
      else {
        LOG_E(ci_log("%s: Clustered stack creation failed because of "
                     "orphans.  Either try again later or use "
                     "EF_CLUSTER_RESTART", __FUNCTION__));
        return rc;
      }
    }
    else {
      LOG_E(ci_log("%s: Stack creation failed because all instances in "
                   "cluster already allocated.", __FUNCTION__));
      return rc;
    }
  }
  roa.in_flags = ni_flags;
  strncpy(roa.in_version, ONLOAD_VERSION, sizeof(roa.in_version));
  strncpy(roa.in_uk_intf_ver, oo_uk_intf_ver, sizeof(roa.in_uk_intf_ver));
  if( (opts = kmalloc(sizeof(*opts), GFP_KERNEL)) == NULL )
    return -ENOMEM;
  memcpy(opts, ni_opts, sizeof(*opts));
  rc = tcp_helper_rm_alloc(&roa, opts, -1, thc, &thr_walk);
  kfree(opts);
  if( rc != 0 )
    return rc;

  /* Do not allow clustered stacks to do TCP loopback. */
  netif = &thr_walk->netif;
  if( NI_OPTS(netif).tcp_server_loopback != CITP_TCP_LOOPBACK_OFF ||
      NI_OPTS(netif).tcp_client_loopback != CITP_TCP_LOOPBACK_OFF )
    ci_log("%s: Disabling Unsupported TCP loopback on clustered stack.",
           __FUNCTION__);
  NI_OPTS(netif).tcp_server_loopback = NI_OPTS(netif).tcp_client_loopback =
    CITP_TCP_LOOPBACK_OFF;

  thr_walk->thc_tid      = current->pid;
  thr_walk->thc          = thc;
  thr_walk->thc_thr_next = thc->thc_thr_head;
  thc->thc_thr_head      = thr_walk;

  if( (thr_walk->thc->thc_flags & THC_FLAG_TPROXY) != 0 )
    netif->state->flags |= CI_NETIF_FLAG_SCALABLE_FILTERS_RSS;

  oo_atomic_inc(&thc->thc_ref_count);
  *thr_out = thr_walk;
  return 0;
}