Exemplo n.º 1
0
ci_sock_cmn* __ci_netif_filter_lookup(ci_netif* netif, unsigned laddr, 
				      unsigned lport, unsigned raddr, 
				      unsigned rport, unsigned protocol)
{
  int rc;

  /* try full lookup */
  rc = ci_netif_filter_lookup(netif, laddr, lport,  raddr, rport, protocol);
  LOG_NV(log(LPF "FULL LOOKUP %s:%u->%s:%u rc=%d",
	     ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
	     ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport),
	     rc));    

  if(CI_LIKELY( rc >= 0 ))
    return ID_TO_SOCK(netif, netif->filter_table->table[rc].id);

  /* try wildcard lookup */
  raddr = rport = 0;
  rc = ci_netif_filter_lookup(netif, laddr, lport, raddr, rport, protocol);
  LOG_NV(log(LPF "WILD LOOKUP %s:%u->%s:%u rc=%d",
	    ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
	    ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport),
	    rc));

  if(CI_LIKELY( rc >= 0 ))
    return ID_TO_SOCK(netif, netif->filter_table->table[rc].id);
 
  return 0;
}
Exemplo n.º 2
0
void ci_netif_filter_for_each_match(ci_netif* ni, unsigned laddr,
                                    unsigned lport, unsigned raddr,
                                    unsigned rport, unsigned protocol,
                                    int intf_i, int vlan,
                                    int (*callback)(ci_sock_cmn*, void*),
                                    void* callback_arg, ci_uint32* hash_out)
{
  ci_netif_filter_table* tbl;
  unsigned hash1, hash2 = 0;
  unsigned first;

  tbl = ni->filter_table;
  if( hash_out != NULL )
    *hash_out = tcp_hash3(tbl, laddr, lport, raddr, rport, protocol);
  hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol);
  first = hash1;

  LOG_NV(log("%s: %s %s:%u->%s:%u hash=%u:%u at=%u",
             __FUNCTION__, CI_IP_PROTOCOL_STR(protocol),
	     ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
	     ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport),
	     first, tcp_hash2(tbl, laddr, lport, raddr, rport, protocol),
	     hash1));

  while( 1 ) {
    int id = tbl->table[hash1].id;
    if(CI_LIKELY( id >= 0 )) {
      ci_sock_cmn* s = ID_TO_SOCK(ni, id);
      if( ((laddr    - tbl->table[hash1].laddr) |
	   (lport    - sock_lport_be16(s)     ) |
	   (raddr    - sock_raddr_be32(s)     ) |
	   (rport    - sock_rport_be16(s)     ) |
	   (protocol - sock_protocol(s)       )) == 0 )
        if(CI_LIKELY( (s->rx_bind2dev_ifindex == CI_IFID_BAD ||
                       ci_sock_intf_check(ni, s, intf_i, vlan)) ))
          if( callback(s, callback_arg) != 0 )
            return;
    }
    else if( id == EMPTY )
      break;
    /* We defer calculating hash2 until it's needed, just to make the fast
    ** case that little bit faster. */
    if( hash1 == first )
      hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol);
    hash1 = (hash1 + hash2) & tbl->table_size_mask;
    if( hash1 == first ) {
      LOG_NV(ci_log(FN_FMT "ITERATE FULL %s:%u->%s:%u hash=%u:%u",
                   FN_PRI_ARGS(ni), ip_addr_str(laddr), lport,
		   ip_addr_str(raddr), rport, hash1, hash2));
      break;
    }
  }
}
Exemplo n.º 3
0
void ef_driver_save_fd(void)
{
  int rc = 0;
  ef_driver_handle fd;
  enum oo_device_type dev_type;

  for( dev_type = 0; dev_type < OO_MAX_DEV; dev_type++ ) {
    if( ! fd_is_saved[dev_type] ) {
      rc = ef_onload_driver_open(&fd, dev_type, 1);
      if( rc == 0 ) {
        saved_fd[dev_type] = fd;
        fd_is_saved[dev_type] = 1;
        LOG_NV(ci_log("%s: Saved fd %d %s for cloning",
                      __func__, (int)fd, oo_device_name[dev_type]));
        if( oo_st_rdev[dev_type] <= 0 ) {
          struct stat st;
          fstat(fd, &st);
          oo_st_rdev[dev_type] = st.st_rdev;
        }
      } else {
        ci_log("%s: failed to open %s - rc=%d",
               __func__, oo_device_name[dev_type], rc);
      }
    }
  }
}
Exemplo n.º 4
0
unsigned long oo_get_st_rdev(enum oo_device_type dev_type)
{
  if( oo_st_rdev[dev_type] == 0 ) {
    struct stat st;
    if( stat(oo_device_name[dev_type], &st) == 0 )
      oo_st_rdev[dev_type] = st.st_rdev;
    else {
      LOG_NV(ci_log("%s: ERROR: stats(%s) failed errno=%d",
                    __func__, oo_device_name[dev_type], errno));
      oo_st_rdev[dev_type] = -1;
    }
  }
  return oo_st_rdev[dev_type];
}
Exemplo n.º 5
0
int ci_netif_filter_lookup(ci_netif* netif, unsigned laddr, unsigned lport,
			   unsigned raddr, unsigned rport, unsigned protocol)
{
  unsigned hash1, hash2 = 0;
  ci_netif_filter_table* tbl;
  unsigned first;

  ci_assert(netif);
  ci_assert(ci_netif_is_locked(netif));
  ci_assert(netif->filter_table);

  tbl = netif->filter_table;
  hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol);
  first = hash1;

  LOG_NV(log("tbl_lookup: %s %s:%u->%s:%u hash=%u:%u at=%u",
	     CI_IP_PROTOCOL_STR(protocol),
	     ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
	     ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport),
	     first, tcp_hash2(tbl, laddr, lport, raddr, rport, protocol),
	     hash1));

  while( 1 ) {
    int id = tbl->table[hash1].id;
    if( CI_LIKELY(id >= 0) ) {
      ci_sock_cmn* s = ID_TO_SOCK(netif, id);
      if( ((laddr    - tbl->table[hash1].laddr) |
	   (lport    - sock_lport_be16(s)     ) |
	   (raddr    - sock_raddr_be32(s)     ) |
	   (rport    - sock_rport_be16(s)     ) |
	   (protocol - sock_protocol(s)       )) == 0 )
      	return hash1;
    }
    if( id == EMPTY )  break;
    /* We defer calculating hash2 until it's needed, just to make the fast
     * case that little bit faster. */
    if( hash1 == first )
      hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol);
    hash1 = (hash1 + hash2) & tbl->table_size_mask;
    if( hash1 == first ) {
      LOG_E(ci_log(FN_FMT "ERROR: LOOP %s:%u->%s:%u hash=%u:%u",
                   FN_PRI_ARGS(netif), ip_addr_str(laddr), lport,
		   ip_addr_str(raddr), rport, hash1, hash2));
      return -ELOOP;
    }
  }

  return -ENOENT;
}
Exemplo n.º 6
0
int ef_onload_handle_move_and_do_cloexec(ef_driver_handle* pfd, int do_cloexec)
{
  int fd;

  if( do_cloexec )
    fd = oo_fcntl_dupfd_cloexec(*pfd, CITP_OPTS.fd_base);
  else
    fd = ci_sys_fcntl(*pfd, F_DUPFD, CITP_OPTS.fd_base);

  /* If we've successfully done the dup then we've also set CLOEXEC if
   * needed on the new fd, so we're done.
   */
  if( fd >= 0 ) {
    ci_tcp_helper_close_no_trampoline(*pfd);
    *pfd = fd;
    return 0;
  }
  else {
    LOG_NV(ci_log("%s: Failed to move fd from %d, rc %d",
                  __func__, *pfd, fd));
  }

  return fd;
}
Exemplo n.º 7
0
int ef_onload_driver_open(ef_driver_handle* pfd,
                          enum oo_device_type dev_type,
                          int do_cloexec)
{
  int rc;
  int flags = 0;
  int saved_errno = errno;

#ifdef O_CLOEXEC
  if( do_cloexec )
    flags = O_CLOEXEC;
#endif

  ci_assert(pfd);
  rc = oo_open(pfd, dev_type, flags);
  if( rc != 0 && errno != EMFILE && fd_is_saved[dev_type] >= 0 ) {
    ci_clone_fd_t op;
    op.do_cloexec = do_cloexec;
    LOG_NV(ci_log("%s: open failed, but cloning from saved fd", __func__));
    rc = ci_sys_ioctl((ci_fd_t) saved_fd[dev_type],
                      clone_ioctl[dev_type], &op);
    if( rc < 0 )
      return rc;
    errno = saved_errno;
    *pfd = op.fd;
  }

  if( rc != 0 )
    return rc;

  /* Our internal driver handles are not visible to the application.  It may
   * make assumptions about the fd space available to it, and try to dup2/3
   * onto one of our driver fds.  To try and minimise this we allow the user
   * to specify a minimum value for us to use, to try and keep out of their
   * way.
   *
   * We have to be able to cope with them coming along and trying to dup onto
   * one of these fds anyway, as they may not have set the option up.  As such
   * we treat failure to shift the fd as acceptable, and just retain the old
   * one.
   */
  if( *pfd < CITP_OPTS.fd_base )
    if( ef_onload_handle_move_and_do_cloexec(pfd, do_cloexec) == 0 )
      return 0;
      
  if( do_cloexec ) {
#if defined(O_CLOEXEC)
    static int o_cloexec_fails = -1;
    if( o_cloexec_fails < 0 ) {
      int arg;
      rc = ci_sys_fcntl(*(int *)pfd, F_GETFD, &arg);
      if( rc == 0 && (arg & FD_CLOEXEC) )
        o_cloexec_fails = 0;
      else
        o_cloexec_fails = 1;
    }
#else
    static const int o_cloexec_fails = 1;
#endif
    if( o_cloexec_fails )
      CI_DEBUG_TRY(ci_sys_fcntl(*(int *)pfd, F_SETFD, FD_CLOEXEC));
  }

  return 0;
}
Exemplo n.º 8
0
/* This function must be called with netif lock not held and it always
 * returns with the netif lock not held.
 */
int efab_tcp_helper_reuseport_bind(ci_private_t *priv, void *arg)
{
  oo_tcp_reuseport_bind_t* trb = arg;
  ci_netif* ni = &priv->thr->netif;
  tcp_helper_cluster_t* thc;
  tcp_helper_resource_t* thr = NULL;
  citp_waitable* waitable;
  ci_sock_cmn* sock = SP_TO_SOCK(ni, priv->sock_id);
  struct oof_manager* fm = efab_tcp_driver.filter_manager;
  struct oof_socket* oofilter;
  struct oof_socket dummy_oofilter;
  int protocol = thc_get_sock_protocol(sock);
  char name[CI_CFG_CLUSTER_NAME_LEN + 1];
  int rc, rc1;
  int flags = 0;
  tcp_helper_cluster_t* named_thc,* ported_thc;
  int alloced = 0;

  /* No clustering on sockets bound to alien addresses */
  if( sock->s_flags & CI_SOCK_FLAG_BOUND_ALIEN )
    return 0;

  if( NI_OPTS(ni).cluster_ignore == 1 ) {
    LOG_NV(ci_log("%s: Ignored attempt to use clusters due to "
                  "EF_CLUSTER_IGNORE option.", __FUNCTION__));
    return 0;
  }

  if( trb->port_be16 == 0 ) {
    ci_log("%s: Reuseport on port=0 is not supported", __FUNCTION__);
    return -EINVAL;
  }

  if( trb->cluster_size < 2 ) {
    ci_log("%s: Cluster sizes < 2 are not supported", __FUNCTION__);
    return -EINVAL;
  }

  if( sock->s_flags & (CI_SOCK_FLAG_TPROXY | CI_SOCK_FLAG_MAC_FILTER) ) {
    ci_log("%s: Scalable filter sockets cannot be clustered",
           __FUNCTION__);
    return -EINVAL;
  }

  oofilter = &ci_trs_ep_get(priv->thr, priv->sock_id)->oofilter;

  if( oofilter->sf_local_port != NULL ) {
    ci_log("%s: Socket that already have filter cannot be clustered",
           __FUNCTION__);
    return -EINVAL;
  }

  if( priv->thr->thc ) {
    /* Reserve proto:port[:ip] until bind (or close)*/
    rc = oof_socket_add(fm, oofilter,
                       OOF_SOCKET_ADD_FLAG_CLUSTERED |
                       OOF_SOCKET_ADD_FLAG_DUMMY,
                       protocol, trb->addr_be32, trb->port_be16, 0, 0,
                       &ported_thc);
    if( rc > 0 )
      rc = 0;
    if( rc == 0 )
      sock->s_flags |= CI_SOCK_FLAG_FILTER;
    return rc;
  }

  mutex_lock(&thc_init_mutex);
  /* We are going to be iterating over clusters, make sure they don't
   * change.
   */
  mutex_lock(&thc_mutex);

  /* Lookup a suitable cluster to use */

  /* We try to add dummy filter to oof to reserve proto:port[:ip] tuple,
   * if there is already a cluster at the tuple we will get reference to it,
   */
  oof_socket_ctor(&dummy_oofilter);
  rc = oof_socket_add(fm, &dummy_oofilter,
                      OOF_SOCKET_ADD_FLAG_CLUSTERED |
                      OOF_SOCKET_ADD_FLAG_DUMMY |
                      OOF_SOCKET_ADD_FLAG_NO_STACK,
                      protocol, trb->addr_be32, trb->port_be16, 0, 0,
                      &ported_thc);
  if( rc < 0 ) /* non-clustered socket on the tuple */
    goto alloc_fail0;

  if( ! gen_cluster_name(trb->cluster_name, name) ) {
    /* user requested a cluster by name.  But we need to make sure
     * that the oof_local_port that the user is interested in is not
     * being used by another cluster.  We search for cluster by name
     * and use results of prior protp:port[:ip] search oof_local_port
     * to then do some sanity checking.
     */
    rc1 = thc_search_by_name(name, protocol, trb->port_be16, ci_geteuid(),
                            &named_thc);
    if( rc1 < 0 ) {
      rc = rc1;
      goto alloc_fail;
    }

    if( rc1 == 0 ) {
      if( rc == 1 ) {
        /* search by oof_local_port found a cluster which search by
         * name didn't find. */
        LOG_E(ci_log("Error: Cluster with requested name %s already "
                     "bound to %s", name, ported_thc->thc_name));
        rc = -EEXIST;
        goto alloc_fail;
      }
      else {
        /* Neither searches found a cluster.  So allocate one below.
         */
      }
    }
    else {
      if( rc == 1 ) {
        /* Both searches found clusters.  Fine if they are the same or
         * else error. */
        if( named_thc != ported_thc ) {
          LOG_E(ci_log("Error: Cluster %s does not handle socket %s:%d.  "
                       "Cluster %s does", name, FMT_PROTOCOL(protocol),
                       trb->port_be16, named_thc->thc_name));
          rc = -EEXIST;
          goto alloc_fail;
        }
      }
      /* Search by name found a cluster no conflict with search by tuple
       * (the ported cluster is either none or the same as named)*/
      thc = named_thc;
      goto cont;
    }
  }
  else {
    /* No cluster name requested.  We have already looked for a cluster handling
     * the tuple.  If none found, then try to use an existing
     * cluster this process created.  If none found, then allocate one.
     */
    /* If rc == 0, then no cluster found - try to allocate one.
     * If rc == 1, we found cluster - make sure that euids match and continue. */
    if( rc == 1 ) {
      thc = ported_thc;
      if( thc->thc_euid != ci_geteuid() ) {
        rc = -EADDRINUSE;
        goto alloc_fail;
      }
      goto cont;
    }
    rc = thc_search_by_name(name, protocol, trb->port_be16, ci_geteuid(),
                            &thc);
    if( rc < 0 )
      goto alloc_fail;
    if( rc == 1 )
      goto cont;
  }
  /* When an interface is in tproxy mode, all clustered listening socket
   * are assumed to be part of tproxy passive side.  This requires
   * rss context to use altered rss hashing based solely on src ip:port.
   */
  flags = tcp_helper_cluster_thc_flags(&NI_OPTS(ni));

  if( (rc = thc_alloc(name, protocol, trb->port_be16, ci_geteuid(),
                      trb->cluster_size, flags, &thc)) != 0 )
      goto alloc_fail;

  alloced = 1;

 cont:
  tcp_helper_cluster_ref(thc);

  /* At this point we have our cluster with one additional reference */

  /* Find a suitable stack within the cluster to use */
  rc = thc_get_thr(thc, &dummy_oofilter, &thr);
  if( rc != 0 )
    rc = thc_alloc_thr(thc, trb->cluster_restart_opt,
                       &ni->opts, ni->flags, &thr);

  /* If get or alloc succeeded thr holds reference to the cluster,
   * so the cluster cannot go away.  We'll drop our reference and also
   * will not be accessing state within the cluster anymore so we can
   * drop the lock. */
  mutex_unlock(&thc_mutex);

  if( alloced && rc == 0 && (flags & THC_FLAG_TPROXY) != 0 ) {
    /* Tproxy filter is allocated as late as here,
     * the reason is that this needs to be preceded by stack allocation
     * (firmware needs initialized vi) */
    rc = thc_install_tproxy(thc, NI_OPTS(ni).scalable_filter_ifindex);
    if( rc != 0 )
      efab_thr_release(thr);
  }

  tcp_helper_cluster_release(thc, NULL);

  if( rc != 0 ) {
    oof_socket_del(fm, &dummy_oofilter);
    goto alloc_fail_unlocked;
  }

  /* We have thr and we hold single reference to it. */

  /* Move the socket into the new stack */
  if( (rc = ci_netif_lock(ni)) != 0 )
    goto drop_and_done;
  waitable = SP_TO_WAITABLE(ni, priv->sock_id);
  rc = ci_sock_lock(ni, waitable);
  if( rc != 0 ) {
    ci_netif_unlock(ni);
    goto drop_and_done;
  }
  /* thr referencing scheme comes from efab_file_move_to_alien_stack_rsop */
  efab_thr_ref(thr);
  rc = efab_file_move_to_alien_stack(priv, &thr->netif, 0);
  if( rc != 0 )
    efab_thr_release(thr);
  else {
    /* beside us, socket now holds its own reference to thr */
    oofilter = &ci_trs_ep_get(thr, sock->b.moved_to_sock_id)->oofilter;
    oof_socket_replace(fm, &dummy_oofilter, oofilter);
    SP_TO_SOCK(&thr->netif, sock->b.moved_to_sock_id)->s_flags |= CI_SOCK_FLAG_FILTER;
    ci_netif_unlock(&thr->netif);
  }

 drop_and_done:
  if( rc != 0 )
    oof_socket_del(fm, &dummy_oofilter);
  /* Drop the reference we got from thc_get_thr or thc_alloc_thr().
   * If things went wrong both stack and cluster might disappear. */
  efab_thr_release(thr);
  oof_socket_dtor(&dummy_oofilter);
  mutex_unlock(&thc_init_mutex);
  return rc;

 alloc_fail:
  oof_socket_del(fm, &dummy_oofilter);
 alloc_fail0:
  mutex_unlock(&thc_mutex);
 alloc_fail_unlocked:
  oof_socket_dtor(&dummy_oofilter);
  mutex_unlock(&thc_init_mutex);
  return rc;
}